In [1]:
from openai import OpenAI
import pandas as pd
import os
from dotenv import load_dotenv
from core.run_evaluation import eval_function
from datasets import load_dataset

#Choose API provider 
load_dotenv()
#Make sure the tokens are in a .env file in the root folder.
openai_key = os.getenv('OPENAIAPI')
groq_key = os.getenv('GROQ')
runpod_key = os.getenv('RUNPOD')
local_key= 'token'

#The urls should point to the /v1 endpoint for openAi endpoint compatibility Example: https://api.groq.com/openai/v1

groq_url = "https://api.groq.com/openai/v1"
vllm_url = "https://api.runpod.ai/v2/vllm-0p8rpij3cqypla/openai/v1"
local_url= "http://100.72.55.38:1234/v1"

client = OpenAI(api_key=openai_key)
#client = OpenAI(api_key=groq_key, base_url=groq_url)
#client= OpenAI(api_key=runpod_key, base_url=vllm_url)
#client= OpenAI(api_key=local_key, base_url=local_url)


#Choose model to test from provider
model = "gpt-4o-mini"
#model= "ft:gpt-4o-mini-2024-07-18:personal:esgemma:ACU8c8ph:ckpt-step-104"
#model= "gpt-4o"
#model= "llama-3.1-8b-instant"
#model= "llama-3.1-70b-versatile"
#model= "esgemma"
#model= 'unigoe/esgemma-2-2b-it/esgemma-2-2b-it.Q4_K_M.gguf'
#model= 'lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF'

#Choose Dataset that should be testet
test_dataset = load_dataset('chris7374/esg-net-zero', revision='test')
df = test_dataset['train'].to_pandas()


In [2]:
#In case when different prompt structures should be testet. Add Prompt Structure Format. In this case we only use one.
prompt_structures = [
    "CIEKX"
]

all_prompt_structures = [
    "CIEKX", "CIEXK", "CIKEX", "CIKXE", "CIXEK", "CIXKE",
    "CEIKX", "CEIXK", "CEKIX", "CEKXI", "CEXIK", "CEXKI",
    "CKIEX", "CKIXE", "CKEIX", "CKEXI", "CKXIE", "CKXEI",
    "CXIEK", "CXIKE", "CXEIK", "CXEKI", "CXKIE", "CXKEI"
]

In [3]:
prompts_to_test = []

In [4]:
#Prompt Schema 0
prompts_to_test.append(
    {
    'instruction' : """You are an expert ESG (Environmental, Social, and Governance) analyst. Your primary task is to analyze and categorize company wide emissions reduction targets. Your task is to classify texts according to whether they belong to ESG and, if necessary, extract a date. The possible classes are "Carbon neutral(ity)", "Emissions reduction target", "Net zero" and "No target".""",
    'example' : """"""
    }
)



In [13]:
#Prompt Schema 1
prompts_to_test.append(
    {
    'instruction' : """You are an expert ESG (Environmental, Social, and Governance) analyst, who conducts ESG research by analyzing texts to identify the presence of emission reduction targets.  Your primary task is to classify identified targets into one of four predefined classes and determine the target year for the emission reduction target. The possible classes are "Carbon neutral(ity)", "Emissions reduction target", "Net zero" and "No target".""",
    'example' : """"""
    }
)


In [14]:
#Prompt Schema 2
prompts_to_test.append(
    {
    'instruction' : """You are an expert ESG (Environmental, Social, and Governance) analyst, who conducts ESG research by analyzing texts to identify the presence of sustainability targets.  Your primary task is to classify identified targets into one of four predefined classes and determine the target year for the sustainability target. The possible classes are "Carbon neutral(ity)", "Emissions reduction target", "Net zero" and "No target".""",
    'example' : """"""
    }
)

In [15]:
#Prompt Schema 3
prompts_to_test.append(
    {
    'instruction' : """You are an expert ESG (Environmental, Social, and Governance) analyst, who conducts ESG research by analyzing texts to identify the presence of climate balance targets.  Your primary task is to classify identified targets into one of four predefined classes and determine the target year for the climate balance target. The possible classes are "Carbon neutral(ity)", "Emissions reduction target", "Net zero" and "No target".""",
    'example' : """"""
    }
)

In [16]:
#Prompt Schema 4
prompts_to_test.append(
    {
    'instruction' : """You are an expert ESG (Environmental, Social, and Governance) analyst, who conducts ESG research by analyzing texts to identify the presence of climate balance targets.  Your primary task is to classify identified targets into one of four predefined classes and determine the target year for the climate balance target. The possible classes are "Carbon neutral(ity)", "Emissions reduction target", "Net zero" and "No target".""",
    'example' : """
### Examples:
Input: "Our company aims to achieve carbon neutrality by 2040 through a combination of emissions reduction and carbon offsets."
Output:
<answer>
<classification>
<end_target>Carbon neutral(ity)</end_target>
</classification>
<extraction>
<end_target_year>2040</end_target_year>
</extraction>
<quote>company aims to achieve carbon neutrality by 2040</quote>
</answer>
Input: "We're committed to reducing our greenhouse gas emissions by 50% compared to 2015 levels by the year 2030. Our
journey to 2030 is focused on first reducing our scope 1, 2, and 3 greenhouse gas emissions by 50 percent compared with 2015, and investing in high-quality carbon removal solutions for the remaining emissions"
Output:
<answer>
<classification>
<end_target>Emissions reduction target</end_target>
</classification>
<extraction>
<end_target_year>2030</end_target_year>
</extraction>
<quote>reducing our greenhouse gas emissions by 50% compared to 2015 levels by the year 2030</quote>
</answer>
Input: "Our long-term vision is to achieve net zero emissions across our entire value chain by 2050."
Output:
<answer>
<classification>
<end_target>Net zero</end_target>
</classification>
<extraction>
<end_target_year>2050</end_target_year>
</extraction>
<quote>long-term vision is to achieve net zero emissions (...) by 2050.</quote>
</answer>
Input: "We recognize the importance of environmental sustainability and are continuously working to improve our operations by ensuring our car fleet to be carbon neutral in 2060"
Output:
<answer>
<classification>
<end_target>No target</end_target>
</classification>
<extraction>
<end_target_year>No target</end_target_year>
</extraction>
<quote>None</quote>
</answer>"""
    }
)

In [17]:
#Prompt Schema 5
prompts_to_test.append(
    {
    'instruction' : """You are an expert ESG (Environmental, Social, and Governance) analyst, who conducts ESG research by analyzing texts to identify the presence of climate balance targets.  
Your primary task is to classify identified targets into one of four predefined classes and determine the target year for the climate balance target.
The possible classes are "Carbon neutral(ity)", "Emissions reduction target", "Net zero" and "No target". If multiple classes appear in the text, select the single best fit. If they are more or less equal, net zero has the highest priority, followed by carbon neutrality, followed by emissions reduction.
""",
    'example' : """
### Examples:
Input: "Our company aims to achieve carbon neutrality by 2040 through a combination of emissions reduction and carbon offsets."
Output:
<answer>
<classification>
<end_target>Carbon neutral(ity)</end_target>
</classification>
<extraction>
<end_target_year>2040</end_target_year>
</extraction>
<quote>company aims to achieve carbon neutrality by 2040</quote>
</answer>
Input: "We're committed to reducing our greenhouse gas emissions by 50% compared to 2015 levels by the year 2030. Our
journey to 2030 is focused on first reducing our scope 1, 2, and 3 greenhouse gas emissions by 50 percent compared with 2015, and investing in high-quality carbon removal solutions for the remaining emissions"
Output:
<answer>
<classification>
<end_target>Emissions reduction target</end_target>
</classification>
<extraction>
<end_target_year>2030</end_target_year>
</extraction>
<quote>reducing our greenhouse gas emissions by 50% compared to 2015 levels by the year 2030</quote>
</answer>
Input: "Our long-term vision is to achieve net zero emissions across our entire value chain by 2050."
Output:
<answer>
<classification>
<end_target>Net zero</end_target>
</classification>
<extraction>
<end_target_year>2050</end_target_year>
</extraction>
<quote>long-term vision is to achieve net zero emissions (...) by 2050.</quote>
</answer>
Input: "We recognize the importance of environmental sustainability and are continuously working to improve our operations by ensuring our car fleet to be carbon neutral in 2060"
Output:
<answer>
<classification>
<end_target>No target</end_target>
</classification>
<extraction>
<end_target_year>No target</end_target_year>
</extraction>
<quote>None</quote>
</answer>"""
    }
)

In [18]:
#Prompt Schema 6
prompts_to_test.append(
    {
    'instruction' : """You are an expert ESG (Environmental, Social, and Governance) analyst, who conducts ESG research by analyzing texts to identify the presence of climate balance targets.  
Your primary task is to classify identified targets into one of four predefined classes and determine the target year for the climate balance target.
The possible classes are "Carbon neutral(ity)", "Emissions reduction target", "Net zero" and "No target". If multiple classes appear in the text, select the single best fit. """,
    'example' : """
### Examples:
Input: "Our company aims to achieve carbon neutrality by 2040 through a combination of emissions reduction and carbon offsets."
Output:
<answer>
<classification>
<end_target>Carbon neutral(ity)</end_target>
</classification>
<extraction>
<end_target_year>2040</end_target_year>
</extraction>
<quote>company aims to achieve carbon neutrality by 2040</quote>
</answer>
Input: "We're committed to reducing our greenhouse gas emissions by 50% compared to 2015 levels by the year 2030. Our
journey to 2030 is focused on first reducing our scope 1, 2, and 3 greenhouse gas emissions by 50 percent compared with 2015, and investing in high-quality carbon removal solutions for the remaining emissions"
Output:
<answer>
<classification>
<end_target>Emissions reduction target</end_target>
</classification>
<extraction>
<end_target_year>2030</end_target_year>
</extraction>
<quote>reducing our greenhouse gas emissions by 50% compared to 2015 levels by the year 2030</quote>
</answer>
Input: "Our long-term vision is to achieve net zero emissions across our entire value chain by 2050."
Output:
<answer>
<classification>
<end_target>Net zero</end_target>
</classification>
<extraction>
<end_target_year>2050</end_target_year>
</extraction>
<quote>long-term vision is to achieve net zero emissions (...) by 2050.</quote>
</answer>
Input: "We recognize the importance of environmental sustainability and are continuously working to improve our operations by ensuring our car fleet to be carbon neutral in 2060"
Output:
<answer>
<classification>
<end_target>No target</end_target>
</classification>
<extraction>
<end_target_year>No target</end_target_year>
</extraction>
<quote>None</quote>
</answer>"""
    }
)

In [19]:
#Prompt Schema 7
prompts_to_test.append(
    {
    'instruction' : """You are an expert ESG (Environmental, Social, and Governance) analyst, who conducts ESG research by analyzing texts to identify the presence of climate balance targets.  
Your primary task is to classify identified targets into one of four predefined classes and determine the target year for the climate balance target.
The possible classes are "Carbon neutral(ity)", "Emissions reduction target", "Net zero" and "No target". If multiple classes exist in the text, select the single best fit.
""",
    'example' : """
### Examples:
Input: "Our company aims to achieve carbon neutrality by 2040 through a combination of emissions reduction and carbon offsets."
Output:
<answer>
<classification>
<end_target>Carbon neutral(ity)</end_target>
</classification>
<extraction>
<end_target_year>2040</end_target_year>
</extraction>
<quote>company aims to achieve carbon neutrality by 2040</quote>
</answer>
Input: "We're committed to reducing our greenhouse gas emissions by 50% compared to 2015 levels by the year 2030. Our
journey to 2030 is focused on first reducing our scope 1, 2, and 3 greenhouse gas emissions by 50 percent compared with 2015, and investing in high-quality carbon removal solutions for the remaining emissions"
Output:
<answer>
<classification>
<end_target>Emissions reduction target</end_target>
</classification>
<extraction>
<end_target_year>2030</end_target_year>
</extraction>
<quote>reducing our greenhouse gas emissions by 50% compared to 2015 levels by the year 2030</quote>
</answer>
Input: "Our long-term vision is to achieve net zero emissions across our entire value chain by 2050."
Output:
<answer>
<classification>
<end_target>Net zero</end_target>
</classification>
<extraction>
<end_target_year>2050</end_target_year>
</extraction>
<quote>long-term vision is to achieve net zero emissions (...) by 2050.</quote>
</answer>
Input: "We recognize the importance of environmental sustainability and are continuously working to improve our operations by ensuring our car fleet to be carbon neutral in 2060"
Output:
<answer>
<classification>
<end_target>No target</end_target>
</classification>
<extraction>
<end_target_year>No target</end_target_year>
</extraction>
<quote>None</quote>
</answer>"""
    }
)

In [20]:
#Prompt Schema 8
prompts_to_test.append(
    {
    'instruction' : """You are an expert ESG (Environmental, Social, and Governance) analyst, who conducts ESG research by analyzing texts to identify the presence of climate balance targets.  
Your primary task is to classify identified targets into one of four predefined classes and determine the target year for the climate balance target. Only consider overall climate balance targets, meaning that they are company wide.
The possible classes are "Carbon neutral(ity)", "Emissions reduction target", "Net zero" and "No target". If multiple classes exist in the text, net zero has the highest priority, followed by carbon neutrality, which is followed by emissions reduction target.
""",
    'example' : """
### Examples:
Input: "Our company aims to achieve carbon neutrality by 2040 through a combination of emissions reduction and carbon offsets."
Output:
<answer>
<classification>
<end_target>Carbon neutral(ity)</end_target>
</classification>
<extraction>
<end_target_year>2040</end_target_year>
</extraction>
<quote>company aims to achieve carbon neutrality by 2040</quote>
</answer>
Input: "We're committed to reducing our greenhouse gas emissions by 50% compared to 2015 levels by the year 2030. Our
journey to 2030 is focused on first reducing our scope 1, 2, and 3 greenhouse gas emissions by 50 percent compared with 2015, and investing in high-quality carbon removal solutions for the remaining emissions"
Output:
<answer>
<classification>
<end_target>Emissions reduction target</end_target>
</classification>
<extraction>
<end_target_year>2030</end_target_year>
</extraction>
<quote>reducing our greenhouse gas emissions by 50% compared to 2015 levels by the year 2030</quote>
</answer>
Input: "Our long-term vision is to achieve net zero emissions across our entire value chain by 2050."
Output:
<answer>
<classification>
<end_target>Net zero</end_target>
</classification>
<extraction>
<end_target_year>2050</end_target_year>
</extraction>
<quote>long-term vision is to achieve net zero emissions (...) by 2050.</quote>
</answer>
Input: "We recognize the importance of environmental sustainability and are continuously working to improve our operations by ensuring our car fleet to be carbon neutral in 2060"
Output:
<answer>
<classification>
<end_target>No target</end_target>
</classification>
<extraction>
<end_target_year>No target</end_target_year>
</extraction>
<quote>None</quote>
</answer>"""
    }
)

In [11]:
#Prompt Schema 9
prompts_to_test.append(
    {'instruction':"""
You are an expert ESG (Environmental, Social, and Governance) analyst who conducts ESG research by analyzing texts to identify the presence of climate balance targets. Your primary task is to classify identified targets into one of four predefined classes and determine the target year for the climate balance target. Only consider overall climate balance targets, meaning that they are company-wide.
The possible classes are “Carbon neutral(ity)”, “Emissions reduction target”, “Net zero”, and “No target”.
Each class has equal importance, and the correct classification should reflect the most explicit target mentioned in the text. In cases where multiple classes are present:
	•	“Net zero” should only be prioritized if explicitly mentioned as a company’s overarching target.
	•	“Carbon neutral(ity)” takes precedence over “Emissions reduction target” only if it is the primary focus of the text.
	•	“Emissions reduction target” should be classified if it is directly stated and not overshadowed by “Net zero” or “Carbon neutral(ity)” commitments.
	•	If no explicit target is mentioned, classify as “No target”.
	Ensure the classification is based on explicit information from the text, without assuming that one target implies another unless clearly stated.
""", 
    'example' : """
### Examples:
Input: "Our company aims to achieve carbon neutrality by 2040 through a combination of emissions reduction and carbon offsets."
Output:
<answer>
<classification>
<end_target>Carbon neutral(ity)</end_target>
</classification>
<extraction>
<end_target_year>2040</end_target_year>
</extraction>
<quote>company aims to achieve carbon neutrality by 2040</quote>
</answer>
Input: "We're committed to reducing our greenhouse gas emissions by 50% compared to 2015 levels by the year 2030. Our
journey to 2030 is focused on first reducing our scope 1, 2, and 3 greenhouse gas emissions by 50 percent compared with 2015, and investing in high-quality carbon removal solutions for the remaining emissions"
Output:
<answer>
<classification>
<end_target>Emissions reduction target</end_target>
</classification>
<extraction>
<end_target_year>2030</end_target_year>
</extraction>
<quote>reducing our greenhouse gas emissions by 50% compared to 2015 levels by the year 2030</quote>
</answer>
Input: "Our long-term vision is to achieve net zero emissions across our entire value chain by 2050."
Output:
<answer>
<classification>
<end_target>Net zero</end_target>
</classification>
<extraction>
<end_target_year>2050</end_target_year>
</extraction>
<quote>long-term vision is to achieve net zero emissions (...) by 2050.</quote>
</answer>
Input: "We recognize the importance of environmental sustainability and are continuously working to improve our operations by ensuring our car fleet to be carbon neutral in 2060"
Output:
<answer>
<classification>
<end_target>No target</end_target>
</classification>
<extraction>
<end_target_year>No target</end_target_year>
</extraction>
<quote>None</quote>
</answer>
"""
}
)

In [5]:
for i, element in enumerate(prompts_to_test):
    instruction = element['instruction']
    example = element['example']
    eval_function(client=client,model=model,instruction=instruction,example=example, iteration=i, ground_truth_dataframe=df,target_column='end_target', target_year_column='end_target_year', context_column='custom_text', prompt_structures=prompt_structures, save_to_docx=True)

Structure: 1/1
1/100
2/100
3/100
4/100
5/100
6/100
7/100
8/100
9/100
10/100
11/100
12/100
13/100
14/100
15/100
16/100
17/100
18/100
19/100
20/100
21/100
22/100
23/100
24/100
25/100
26/100
27/100
28/100
29/100
30/100
31/100
32/100
33/100
34/100
35/100
36/100
37/100
38/100
39/100
40/100
41/100
42/100
43/100
44/100
45/100
46/100
47/100
48/100
49/100
50/100
51/100
52/100
53/100
54/100
55/100
56/100
57/100
58/100
59/100
60/100
61/100
62/100
63/100
64/100
65/100
66/100
67/100
68/100
69/100
70/100
71/100
72/100
73/100
74/100
75/100
76/100
77/100
78/100
79/100
80/100
81/100
82/100
83/100
84/100
85/100
86/100
87/100
88/100
89/100
90/100
91/100
92/100
93/100
94/100
95/100
96/100
97/100
98/100
99/100
100/100
Results saved to results\iteration_0.docx


Precision: Precision measures the accuracy of positive predictions made by the model. It answers the question, "Of all the instances that the model predicted as positive, how many were actually positive?" <br>
Recall: Recall measures the model's ability to identify all actual positive instances in the dataset. It answers the question, "Of all the actual positive instances, how many did the model correctly identify as positive?" <br> <br>
High Recall, Low Precision: A model with high recall may cast a wide net, capturing nearly all positive instances, but at the cost of also capturing a lot of false positives (i.e., instances incorrectly predicted as positive). <br>
High Precision, Low Recall: A model with high precision is very selective, predicting positive only when it is very confident, but it may miss many actual positive instances (leading to false negatives).