# Install required libraries and import them

In [1]:
!pip install transformers==4.28.0
!pip install datasets
!pip install fastapi nest-asyncio pyngrok uvicorn
!pip install pydantic
!pip install openai tiktoken
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
import numpy as np
from sklearn.metrics import classification_report,f1_score,confusion_matrix
from fastapi import FastAPI, Body, Request
from fastapi.middleware.cors import CORSMiddleware
import nest_asyncio
from pyngrok import ngrok
import uvicorn
from transformers import TextClassificationPipeline
from pydantic import BaseModel
import pandas as pd
import openai
import tiktoken

Collecting transformers==4.28.0
  Downloading transformers-4.28.0-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m28.4 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers==4.28.0)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m72.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.15.1
    Uninstalling tokenizers-0.15.1:
      Successfully uninstalled tokenizers-0.15.1
  Attempting uninstall: transformers
    Found existing installation: transformers 4.35.2
    Uninstalling transformers-4.35.2:
      Successfully uninstalled transformers-4.35.2
Successfully installed tokenizers-0.13.3 transformers-4.28.0
Collecting datasets
  Download

## Initialize global variables

In [2]:
from typing import List

app = FastAPI(title="Climate Change Models on FastAPI")
app.add_middleware(
    CORSMiddleware,
    allow_origins=['*'],
    allow_credentials=True,
    allow_methods=['*'],
    allow_headers=['*'],
)
# Climate Detection
climate_change_detection_tokenizer = None
climate_change_detection_model = None
climate_change_detection_arg = None
climate_change_detection_trainer = None

# Climate Sentiment
climate_change_sentiment_tokenizer = None
climate_change_sentiment_model = None
climate_change_sentiment_arg = None
climate_change_sentiment_trainer = None

# Climate Specificity
climate_change_specificity_tokenizer = None
climate_change_specificity_model = None
climate_change_specificity_arg = None
climate_change_specificity_trainer = None

# Climate Commitments and Actions
climate_change_commitments_actions_tokenizer = None
climate_change_commitments_actions_model = None
climate_change_commitments_actions_arg = None
climate_change_commitments_actions_trainer = None

# Climate Classification on TCFD Recommendations
climate_change_tcfd_tokenizer = None
climate_change_tcfd_model = None
climate_change_tcfd_arg = None
climate_change_tcfd_trainer = None

# Our model based on ClimateBert Climate Detection model

climate_change_our_tokenizer = None
climate_change_our_model = None
climate_change_our_arg = None
climate_change_our_trainer = None

# Define Request Types
class RequestBodyMultipleObjects(BaseModel):
    objects: List | None = None

# pipe = None

#Loading Climate Detection model

@app.on_event("startup")
def load_detection_model():
  global climate_change_detection_tokenizer
  global climate_change_detection_model
  global climate_change_detection_arg
  global climate_change_detection_trainer
  # global pipe
  climate_change_detection_tokenizer = AutoTokenizer.from_pretrained("climatebert/distilroberta-base-climate-detector")
  climate_change_detection_model = AutoModelForSequenceClassification.from_pretrained("climatebert/distilroberta-base-climate-detector")
  climate_change_detection_arg = TrainingArguments(
    "label",
    learning_rate=5e-5,
    num_train_epochs=4,
    per_device_eval_batch_size=32,
    per_device_train_batch_size=32,
    seed=19
  )
  climate_change_detection_trainer = Trainer(
    model=climate_change_detection_model,
    args=climate_change_detection_arg,
    tokenizer=climate_change_detection_tokenizer,
)
  # pipe = TextClassificationPipeline(model=climate_change_detection_model, tokenizer=climate_change_detection_tokenizer, device="cuda:0")

#Loading Climate Sentiment model

@app.on_event("startup")
def load_sentiment_model():
  global climate_change_sentiment_tokenizer
  global climate_change_sentiment_model
  global climate_change_sentiment_arg
  global climate_change_sentiment_trainer
  climate_change_sentiment_tokenizer = AutoTokenizer.from_pretrained("climatebert/distilroberta-base-climate-sentiment")
  climate_change_sentiment_model = AutoModelForSequenceClassification.from_pretrained("climatebert/distilroberta-base-climate-sentiment")
  climate_change_sentiment_arg = TrainingArguments(
    "label",
    learning_rate=5e-5,
    num_train_epochs=4,
    per_device_eval_batch_size=32,
    per_device_train_batch_size=32,
    seed=19
  )
  climate_change_sentiment_trainer = Trainer(
    model=climate_change_sentiment_model,
    args=climate_change_sentiment_arg,
    tokenizer=climate_change_sentiment_tokenizer,
)

#Loading Climate Specificity model

@app.on_event("startup")
def load_specificity_model():
  global climate_change_specificity_tokenizer
  global climate_change_specificity_model
  global climate_change_specificity_arg
  global climate_change_specificity_trainer
  climate_change_specificity_tokenizer = AutoTokenizer.from_pretrained("climatebert/distilroberta-base-climate-specificity")
  climate_change_specificity_model = AutoModelForSequenceClassification.from_pretrained("climatebert/distilroberta-base-climate-specificity")
  climate_change_specificity_arg = TrainingArguments(
    "label",
    learning_rate=5e-5,
    num_train_epochs=4,
    per_device_eval_batch_size=32,
    per_device_train_batch_size=32,
    seed=19
  )
  climate_change_specificity_trainer = Trainer(
    model=climate_change_specificity_model,
    args=climate_change_specificity_arg,
    tokenizer=climate_change_specificity_tokenizer,
)

#Loading Climate Commitments and Actions model

@app.on_event("startup")
def load_commitments_model():
  global climate_change_commitments_actions_tokenizer
  global climate_change_commitments_actions_model
  global climate_change_commitments_actions_arg
  global climate_change_commitments_actions_trainer
  climate_change_commitments_actions_tokenizer = AutoTokenizer.from_pretrained("climatebert/distilroberta-base-climate-commitment")
  climate_change_commitments_actions_model = AutoModelForSequenceClassification.from_pretrained("climatebert/distilroberta-base-climate-commitment")
  climate_change_commitments_actions_arg = TrainingArguments(
    "label",
    learning_rate=5e-5,
    num_train_epochs=4,
    per_device_eval_batch_size=32,
    per_device_train_batch_size=32,
    seed=19
  )
  climate_change_commitments_actions_trainer = Trainer(
    model=climate_change_commitments_actions_model,
    args=climate_change_commitments_actions_arg,
    tokenizer=climate_change_commitments_actions_tokenizer,
)

#Loading Climate TCFD Recommendations model

@app.on_event("startup")
def load_tcfd_model():
  global climate_change_tcfd_tokenizer
  global climate_change_tcfd_model
  global climate_change_tcfd_arg
  global climate_change_tcfd_trainer
  climate_change_tcfd_tokenizer = AutoTokenizer.from_pretrained("climatebert/distilroberta-base-climate-tcfd")
  climate_change_tcfd_model = AutoModelForSequenceClassification.from_pretrained("climatebert/distilroberta-base-climate-tcfd")
  climate_change_tcfd_arg = TrainingArguments(
    "label",
    learning_rate=5e-5,
    num_train_epochs=4,
    per_device_eval_batch_size=32,
    per_device_train_batch_size=32,
    seed=19
  )
  climate_change_tcfd_trainer = Trainer(
    model=climate_change_tcfd_model,
    args=climate_change_tcfd_arg,
    tokenizer=climate_change_tcfd_tokenizer,
)

#Loading our model for climate change

@app.on_event("startup")
def load_our_model():
  global climate_change_our_tokenizer
  global climate_change_our_model
  global climate_change_our_arg
  global climate_change_our_trainer
  climate_change_our_tokenizer = AutoTokenizer.from_pretrained("/content/drive/MyDrive/DS-Environment-Project/Models/climate-change-model-based-on-climate-detector-further-trained-on-paragraphs")
  climate_change_our_model = AutoModelForSequenceClassification.from_pretrained("/content/drive/MyDrive/DS-Environment-Project/Models/climate-change-model-based-on-climate-detector-further-trained-on-paragraphs")
  climate_change_our_model.cuda()
  climate_change_our_model.to("cuda")
  climate_change_our_arg = TrainingArguments(
    "label",
    learning_rate=5e-5,
    num_train_epochs=4,
    per_device_eval_batch_size=32,
    per_device_train_batch_size=32,
    seed=19
  )
  climate_change_our_trainer = Trainer(
    model=climate_change_our_model,
    args=climate_change_our_arg,
    tokenizer=climate_change_our_tokenizer,
)








        on_event is deprecated, use lifespan event handlers instead.

        Read more about it in the
        [FastAPI docs for Lifespan Events](https://fastapi.tiangolo.com/advanced/events/).
        
  @app.on_event("startup")
        on_event is deprecated, use lifespan event handlers instead.

        Read more about it in the
        [FastAPI docs for Lifespan Events](https://fastapi.tiangolo.com/advanced/events/).
        
  @app.on_event("startup")
        on_event is deprecated, use lifespan event handlers instead.

        Read more about it in the
        [FastAPI docs for Lifespan Events](https://fastapi.tiangolo.com/advanced/events/).
        
  @app.on_event("startup")
        on_event is deprecated, use lifespan event handlers instead.

        Read more about it in the
        [FastAPI docs for Lifespan Events](https://fastapi.tiangolo.com/advanced/events/).
        
  @app.on_event("startup")
        on_event is deprecated, use lifespan event handlers instead.

      

# API Endpoints and handlers

In [3]:
# ChatGPT function.
from openai import OpenAI
import os
def get_completion(prompt, model="gpt-3.5-turbo"):

  messages = [{"role": "user", "content": prompt}]
  client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
  )

  response = client.chat.completions.create(

  model=model,

  messages=messages,

  temperature=0,
)

  return response.choices[0].message.content

## Prediction on tasks

In [4]:
#Climate Detection
@app.post("/climate-detect")
def get_label_detect(input_data, chosen_model):
  dataset=load_dataset("climatebert/climate_detection")
  predicted_data = {}
  if chosen_model == "auto-detect":
    sentences = input_data.split(".")
    if len(sentences) > 1:
      pipe = TextClassificationPipeline(model=climate_change_detection_model, tokenizer=climate_change_detection_tokenizer, device="cuda:0")
      predicted_data = pipe(input_data)
      predicted_data = predicted_data[0]
      return predicted_data
    else:
      pipe = TextClassificationPipeline(model=climate_change_our_model, tokenizer=climate_change_our_tokenizer, device="cuda:0")
      predicted_data = pipe(input_data)
      predicted_data = predicted_data[0]
      return predicted_data
  elif chosen_model == "our-model":
    pipe = TextClassificationPipeline(model=climate_change_our_model, tokenizer=climate_change_our_tokenizer, device="cuda:0")
    predicted_data = pipe(input_data)
    predicted_data = predicted_data[0]
    return predicted_data
  elif chosen_model == "climatebert":
    pipe = TextClassificationPipeline(model=climate_change_detection_model, tokenizer=climate_change_detection_tokenizer, device="cuda:0")
    predicted_data = pipe(input_data)
    predicted_data = predicted_data[0]
    return predicted_data
  # encoded_input = climate_change_tokenizer(dataset['test']['text'],  truncation=True)
  else:
    prompt = 'You are the sustainability, environment, and climate change expert. Is the following text about sustainability, the environment, or climate change? Answer only with yes if the text is sustainability, environment or climate change related or no if not and also give a score of confidence from 0 to 1. The output should be formatted in JSON, with the attribute label for the label and score for the confidence score. The output should be without \n signs and without escaped signs. Example output: {"label":"yes","score":0.999975323677063} \n\n'
    prompt+=input_data
    response = get_completion(prompt)
    predicted_data = response
    predicted_data = eval(predicted_data)
    return predicted_data
  # if chosen_model != "chatgpt":
  #   predicted_data = pipe(input_data)
  #   predicted_data = predicted_data[0]
  return predicted_data

In [5]:
#Climate Sentiment
@app.post("/climate-sentiment")
def get_label_sentiment(input_data, chosen_model):
  predicted_data = {}
  if chosen_model == "climatebert":
    dataset=load_dataset("climatebert/climate_sentiment")
    pipe = TextClassificationPipeline(model=climate_change_sentiment_model, tokenizer=climate_change_sentiment_tokenizer, device="cuda:0")
    predicted_data = pipe(input_data)
    predicted_data = predicted_data[0]
    # encoded_input = climate_change_tokenizer(dataset['test']['text'],  truncation=True)
  else:
    prompt = 'You are the sustainability, environment, and climate change expert. Does the following text indicate risk, is neutral or indicates an opportunity about sustainability, the environment, or climate change? Answer only with risk if the text indicates risk, answer only with neutral if the text is neutral or with opportunity if the text indicates an opportunity and also give a score of confidence from 0 to 1. The output should be formatted in JSON, with the attribute label for the label and score for the confidence score. The output should be without \n signs and without escaped signs. Example output: {"label":"yes","score":0.999975323677063} \n\n'
    prompt+=input_data
    response = get_completion(prompt)
    predicted_data = response
    predicted_data = eval(predicted_data)
  # if chosen_model != "chatgpt":
  #   predicted_data = pipe(input_data)
  #   predicted_data = predicted_data[0]
  return predicted_data

In [6]:
#Climate Specificity
@app.post("/climate-specificity")
def get_label_specificity(input_data, chosen_model):
  predicted_data = {}
  if chosen_model == "climatebert":
    dataset=load_dataset("climatebert/climate_specificity")
    pipe = TextClassificationPipeline(model=climate_change_specificity_model, tokenizer=climate_change_specificity_tokenizer, device="cuda:0")
    # encoded_input = climate_change_tokenizer(dataset['test']['text'],  truncation=True)
    predicted_data = pipe(input_data)
    predicted_data = predicted_data[0]
  else:
    prompt = 'You are the sustainability, environment, and climate change expert. Is the following text specific about sustainability, environment, and climate change or not? Answer only with non-specific if the text is not specific and with specific if the text is specific and also give a score of confidence from 0 to 1. The output should be formatted in JSON, with the attribute label for the label and score for the confidence score. The output should be without \n signs and without escaped signs. Example output: {"label":"yes","score":0.999975323677063} \n\n'
    prompt+=input_data
    response = get_completion(prompt)
    predicted_data = response
    predicted_data = eval(predicted_data)
  # if chosen_model != "chatgpt":
  #   predicted_data = pipe(input_data)
  #   predicted_data = predicted_data[0]
  return predicted_data

In [7]:
#Climate Commitments and Actions
@app.post("/climate-commitments-actions")
def get_label_commitments_actions(input_data, chosen_model):
  predicted_data = {}
  if chosen_model == "climatebert":
    dataset=load_dataset("climatebert/climate_commitments_actions")
    pipe = TextClassificationPipeline(model=climate_change_commitments_actions_model, tokenizer=climate_change_commitments_actions_tokenizer, device="cuda:0")
    # encoded_input = climate_change_tokenizer(dataset['test']['text'],  truncation=True)
    predicted_data = pipe(input_data)
    predicted_data = predicted_data[0]
  else:
    prompt = 'You are the sustainability, environment, and climate change expert. Is the following text about climate commitments and actions or not? Answer only with no if the text is not about climate commitments and actions and with yes if the text is about climate commitments and actions and also give a score of confidence from 0 to 1. The output should be formatted in JSON, with the attribute label for the label and score for the confidence score. The output should be without \n signs and without escaped signs. Example output: {"label":"yes","score":0.999975323677063} \n\n'
    prompt+=input_data
    response = get_completion(prompt)
    predicted_data = response
    predicted_data = eval(predicted_data)
  # if chosen_model != "chatgpt":
  #   predicted_data = pipe(input_data)
  #   predicted_data = predicted_data[0]
  return predicted_data

In [8]:
#Climate Classification on TCFD Recommendations
@app.post("/climate-tcfd")
def get_label_tcfd(input_data, chosen_model):
  predicted_data = {}
  if chosen_model == "climatebert":
    dataset=load_dataset("climatebert/tcfd_recommendations")
    pipe = TextClassificationPipeline(model=climate_change_tcfd_model, tokenizer=climate_change_tcfd_tokenizer, device="cuda:0")
    # encoded_input = climate_change_tokenizer(dataset['test']['text'],  truncation=True)
    predicted_data = pipe(input_data)
    predicted_data = predicted_data[0]
  else:
    prompt = 'You are the sustainability, environment, and climate change expert. Is the following text about metrics, strategy, risk, governance or is not climate change-related? Answer only with none if the text is not climate-related, answer only with metrics if the text is about metrics for sustainability, environment, and climate change, answer only with strategy if the text is about strategy for sustainability, environment, and climate change, answer only with risk if the text is about risk for sustainability, environment, and climate change and answer only with governance if the text is about governance for sustainability, environment, and climate change and also give a score of confidence from 0 to 1. The output should be formatted in JSON, with the attribute label for the label and score for the confidence score. The output should be without \n signs and without escaped signs. Example output: {"label":"yes","score":0.999975323677063} \n\n'
    prompt+=input_data
    response = get_completion(prompt)
    predicted_data = response
    predicted_data = eval(predicted_data)
  # if chosen_model != "chatgpt":
  #   predicted_data = pipe(input_data)
  #   predicted_data = predicted_data[0]
  return predicted_data

## Training of the models with data from our dataset

In [9]:
#Training on Climate Detection task - ClimateBert model
@app.post("/train/climate-detect")
async def train_detection(request: Request):
  dataset=load_dataset("climatebert/climate_detection")
  json_object = await request.json()
  entries = json_object['entries']
  df = pd.DataFrame(data=eval(entries),columns=["text","label"])
  df["label"] = df["label"].replace("yes",1)
  df["label"] = df["label"].replace("no",0)
  train_ds = Dataset.from_pandas(df)
  train_encoded_ds = train_ds.map(lambda t: climate_change_detection_tokenizer(t['text'],  truncation=True, padding=True), batched=True,load_from_cache_file=False)
  climate_change_detection_trainer = Trainer(
    model=climate_change_detection_model,
    args=climate_change_detection_arg,
    tokenizer=climate_change_detection_tokenizer,
    train_dataset=train_encoded_ds
)
  climate_change_detection_trainer.train()
  return entries

In [10]:
#Training on Climate Detection task - Our model
@app.post("/train/climate-detect-our")
async def train_detection_our(request: Request):
  dataset=load_dataset("climatebert/climate_detection")
  json_object = await request.json()
  entries = json_object['entries']
  df = pd.DataFrame(data=eval(entries),columns=["text","label"])
  df["label"] = df["label"].replace("yes",1)
  df["label"] = df["label"].replace("no",0)
  train_ds = Dataset.from_pandas(df)
  train_encoded_ds = train_ds.map(lambda t: climate_change_our_tokenizer(t['text'],  truncation=True, padding=True), batched=True,load_from_cache_file=False)
  climate_change_our_trainer = Trainer(
    model=climate_change_our_model,
    args=climate_change_our_arg,
    tokenizer=climate_change_our_tokenizer,
    train_dataset=train_encoded_ds
)
  climate_change_our_trainer.train()
  return entries

In [11]:
#Training on Climate Sentiment task
@app.post("/train/climate-sentiment")
async def train_sentiment(request: Request):
  dataset=load_dataset("climatebert/climate_sentiment")
  json_object = await request.json()
  entries = json_object['entries']
  df = pd.DataFrame(data=eval(entries),columns=["text","label"])
  df["label"] = df["label"].replace("risk",0)
  df["label"] = df["label"].replace("neutral",1)
  df["label"] = df["label"].replace("opportunity",2)
  train_ds = Dataset.from_pandas(df)
  train_encoded_ds = train_ds.map(lambda t: climate_change_sentiment_tokenizer(t['text'],  truncation=True, padding=True), batched=True,load_from_cache_file=False)
  climate_change_sentiment_trainer = Trainer(
    model=climate_change_sentiment_model,
    args=climate_change_sentiment_arg,
    tokenizer=climate_change_sentiment_tokenizer,
    train_dataset=train_encoded_ds
)
  climate_change_sentiment_trainer.train()
  return entries

In [12]:
#Training on Climate Specificity task
@app.post("/train/climate-specificity")
async def train_specificity(request: Request):
  dataset=load_dataset("climatebert/climate_specificity")
  json_object = await request.json()
  entries = json_object['entries']
  df = pd.DataFrame(data=eval(entries),columns=["text","label"])
  df["label"] = df["label"].replace("non-specific",0)
  df["label"] = df["label"].replace("specific",1)
  train_ds = Dataset.from_pandas(df)
  train_encoded_ds = train_ds.map(lambda t: climate_change_specificity_tokenizer(t['text'],  truncation=True, padding=True), batched=True,load_from_cache_file=False)
  climate_change_specificity_trainer = Trainer(
    model=climate_change_specificity_model,
    args=climate_change_specificity_arg,
    tokenizer=climate_change_specificity_tokenizer,
    train_dataset=train_encoded_ds
)
  climate_change_specificity_trainer.train()
  return entries

In [13]:
#Training on Climate TCFD task
@app.post("/train/climate-tcfd")
async def train_tcfd(request: Request):
  dataset=load_dataset("climatebert/tcfd_recommendations")
  json_object = await request.json()
  entries = json_object['entries']
  df = pd.DataFrame(data=eval(entries),columns=["text","label"])
  df["label"] = df["label"].replace("none",0)
  df["label"] = df["label"].replace("metrics",1)
  df["label"] = df["label"].replace("strategy",2)
  df["label"] = df["label"].replace("risk",3)
  df["label"] = df["label"].replace("governance",4)
  train_ds = Dataset.from_pandas(df)
  train_encoded_ds = train_ds.map(lambda t: climate_change_tcfd_tokenizer(t['text'],  truncation=True, padding=True), batched=True,load_from_cache_file=False)
  climate_change_tcfd_trainer = Trainer(
    model=climate_change_tcfd_model,
    args=climate_change_tcfd_arg,
    tokenizer=climate_change_tcfd_tokenizer,
    train_dataset=train_encoded_ds
)
  climate_change_tcfd_trainer.train()
  return entries

In [14]:
#Training on Climate Commitments Actions task
@app.post("/train/climate-commitments-actions")
async def train_commitments(request: Request):
  dataset=load_dataset("climatebert/climate_commitments_actions")
  json_object = await request.json()
  entries = json_object['entries']
  df = pd.DataFrame(data=eval(entries),columns=["text","label"])
  df["label"] = df["label"].replace("no",0)
  df["label"] = df["label"].replace("yes",1)
  train_ds = Dataset.from_pandas(df)
  train_encoded_ds = train_ds.map(lambda t: climate_change_commitments_actions_tokenizer(t['text'],  truncation=True, padding=True), batched=True,load_from_cache_file=False)
  climate_change_commitments_actions_trainer = Trainer(
    model=climate_change_commitments_actions_model,
    args=climate_change_commitments_actions_arg,
    tokenizer=climate_change_commitments_actions_tokenizer,
    train_dataset=train_encoded_ds
)
  climate_change_commitments_actions_trainer.train()
  return entries

## Predicting whole dataset - either imported or from our dataset

In [15]:
#Predicting whole dataset on Climate Detection task - ClimateBert model
@app.post("/dataset/climate-detect")
async def dataset_detection(request: Request):
  dataset=load_dataset("climatebert/climate_detection")
  json_object = await request.json()
  entries = json_object['entries']
  df = pd.DataFrame(data=eval(entries),columns=["text"])
  df = df.applymap(str)
  # df["label"] = df["label"].replace("yes",1)
  # df["label"] = df["label"].replace("no",0)
  test_ds = Dataset.from_pandas(df)
  test_encoded_ds = test_ds.map(lambda t: climate_change_detection_tokenizer(t['text'],  truncation=True, padding=True), batched=True,load_from_cache_file=False)
  y_pred_climate = climate_change_detection_trainer.predict(test_encoded_ds)
  y_pred_climate = y_pred_climate.predictions
  import numpy as np
  y_pred_climate = [np.argmax(y_pred_climate[i]) for i in range(0,len(y_pred_climate))]
  predictions = []
  for i in range(0, len(y_pred_climate)):
    if y_pred_climate[i] == 0:
      predictions.append("no")
    elif y_pred_climate[i] == 1:
      predictions.append("yes")
  return predictions

In [16]:
#Predicting whole dataset on Climate sentiment task - ClimateBert model
@app.post("/dataset/climate-sentiment")
async def dataset_sentiment(request: Request):
  dataset=load_dataset("climatebert/climate_sentiment")
  json_object = await request.json()
  entries = json_object['entries']
  df = pd.DataFrame(data=eval(entries),columns=["text"])
  df = df.applymap(str)
  # df["label"] = df["label"].replace("yes",1)
  # df["label"] = df["label"].replace("no",0)
  test_ds = Dataset.from_pandas(df)
  test_encoded_ds = test_ds.map(lambda t: climate_change_sentiment_tokenizer(t['text'],  truncation=True, padding=True), batched=True,load_from_cache_file=False)
  y_pred_climate = climate_change_sentiment_trainer.predict(test_encoded_ds)
  y_pred_climate = y_pred_climate.predictions
  import numpy as np
  y_pred_climate = [np.argmax(y_pred_climate[i]) for i in range(0,len(y_pred_climate))]
  predictions = []
  for i in range(0, len(y_pred_climate)):
    if y_pred_climate[i] == 0:
      predictions.append("risk")
    elif y_pred_climate[i] == 1:
      predictions.append("neutral")
    elif y_pred_climate[i] == 2:
      predictions.append("opportunity")
  return predictions

In [17]:
#Predicting whole dataset on Climate specificity task - ClimateBert model
@app.post("/dataset/climate-specificity")
async def dataset_specificity(request: Request):
  dataset=load_dataset("climatebert/climate_specificity")
  json_object = await request.json()
  entries = json_object['entries']
  df = pd.DataFrame(data=eval(entries),columns=["text"])
  df = df.applymap(str)
  # df["label"] = df["label"].replace("yes",1)
  # df["label"] = df["label"].replace("no",0)
  test_ds = Dataset.from_pandas(df)
  test_encoded_ds = test_ds.map(lambda t: climate_change_specificity_tokenizer(t['text'],  truncation=True, padding=True), batched=True,load_from_cache_file=False)
  y_pred_climate = climate_change_specificity_trainer.predict(test_encoded_ds)
  y_pred_climate = y_pred_climate.predictions
  import numpy as np
  y_pred_climate = [np.argmax(y_pred_climate[i]) for i in range(0,len(y_pred_climate))]
  predictions = []
  for i in range(0, len(y_pred_climate)):
    if y_pred_climate[i] == 0:
      predictions.append("non-specific")
    elif y_pred_climate[i] == 1:
      predictions.append("specific")
  return predictions

In [18]:
#Predicting whole dataset on Climate commitments task - ClimateBert model
@app.post("/dataset/climate-commitments")
async def dataset_specificity(request: Request):
  dataset=load_dataset("climatebert/climate_commitments_actions")
  json_object = await request.json()
  entries = json_object['entries']
  df = pd.DataFrame(data=eval(entries),columns=["text"])
  df = df.applymap(str)
  # df["label"] = df["label"].replace("yes",1)
  # df["label"] = df["label"].replace("no",0)
  test_ds = Dataset.from_pandas(df)
  test_encoded_ds = test_ds.map(lambda t: climate_change_commitments_actions_tokenizer(t['text'],  truncation=True, padding=True), batched=True,load_from_cache_file=False)
  y_pred_climate = climate_change_commitments_actions_trainer.predict(test_encoded_ds)
  y_pred_climate = y_pred_climate.predictions
  import numpy as np
  y_pred_climate = [np.argmax(y_pred_climate[i]) for i in range(0,len(y_pred_climate))]
  predictions = []
  for i in range(0, len(y_pred_climate)):
    if y_pred_climate[i] == 0:
      predictions.append("no")
    elif y_pred_climate[i] == 1:
      predictions.append("yes")
  return predictions

In [19]:
#Predicting whole dataset on Climate commitments task - ClimateBert model
@app.post("/dataset/climate-tcfd")
async def dataset_specificity(request: Request):
  dataset=load_dataset("climatebert/tcfd_recommendations")
  json_object = await request.json()
  entries = json_object['entries']
  df = pd.DataFrame(data=eval(entries),columns=["text"])
  df = df.applymap(str)
  # df["label"] = df["label"].replace("yes",1)
  # df["label"] = df["label"].replace("no",0)
  test_ds = Dataset.from_pandas(df)
  test_encoded_ds = test_ds.map(lambda t: climate_change_tcfd_tokenizer(t['text'],  truncation=True, padding=True), batched=True,load_from_cache_file=False)
  y_pred_climate = climate_change_tcfd_trainer.predict(test_encoded_ds)
  y_pred_climate = y_pred_climate.predictions
  import numpy as np
  y_pred_climate = [np.argmax(y_pred_climate[i]) for i in range(0,len(y_pred_climate))]
  predictions = []
  for i in range(0, len(y_pred_climate)):
    if y_pred_climate[i] == 0:
      predictions.append("none")
    elif y_pred_climate[i] == 1:
      predictions.append("metrics")
    elif y_pred_climate[i] == 2:
      predictions.append("strategy")
    elif y_pred_climate[i] == 3:
      predictions.append("risk")
    elif y_pred_climate[i] == 4:
      predictions.append("governance")
  return predictions

## Evaluation of models

In [20]:
#Evaluating on Climate Detection task - ClimateBert model
@app.post("/evaluate/climate-detect")
async def evaluate_detection(request: Request):
  dataset=load_dataset("climatebert/climate_detection")
  encoded_ds = dataset.map(lambda t: climate_change_detection_tokenizer(t['text'],  truncation=True, padding=True), batched=True,load_from_cache_file=False)
  y_pred = climate_change_detection_trainer.predict(encoded_ds['test'])
  y_pred = y_pred.predictions
  import numpy as np
  y_pred = [np.argmax(y_pred[i]) for i in range(0,len(y_pred))]
  from sklearn.metrics import f1_score
  f1 = f1_score(encoded_ds['test']['label'],y_pred,average='macro')
  return f1

In [21]:
#Evaluating on Climate commitments_actions task - Our model
@app.post("/evaluate/climate-commitments-actions")
async def evaluate_detection(request: Request):
  dataset=load_dataset("climatebert/climate_commitments_actions")
  encoded_ds = dataset.map(lambda t: climate_change_commitments_actions_tokenizer(t['text'],  truncation=True, padding=True), batched=True,load_from_cache_file=False)
  y_pred = climate_change_commitments_actions_trainer.predict(encoded_ds['test'])
  y_pred = y_pred.predictions
  import numpy as np
  y_pred = [np.argmax(y_pred[i]) for i in range(0,len(y_pred))]
  from sklearn.metrics import f1_score
  f1 = f1_score(encoded_ds['test']['label'],y_pred,average='macro')
  return f1

In [22]:
#Evaluating on Climate tcfd task - Our model
@app.post("/evaluate/climate-tcfd")
async def evaluate_detection(request: Request):
  dataset=load_dataset("climatebert/tcfd_recommendations")
  encoded_ds = dataset.map(lambda t: climate_change_tcfd_tokenizer(t['text'],  truncation=True, padding=True), batched=True,load_from_cache_file=False)
  y_pred = climate_change_tcfd_trainer.predict(encoded_ds['test'])
  y_pred = y_pred.predictions
  import numpy as np
  y_pred = [np.argmax(y_pred[i]) for i in range(0,len(y_pred))]
  from sklearn.metrics import f1_score
  f1 = f1_score(encoded_ds['test']['label'],y_pred,average='macro')
  return f1

In [23]:
#Evaluating on Climate Detection task - Our model
@app.post("/evaluate/climate-detect-our")
async def evaluate_detection(request: Request):
  dataset=load_dataset("climatebert/climate_detection")
  encoded_ds = dataset.map(lambda t: climate_change_our_tokenizer(t['text'],  truncation=True, padding=True), batched=True,load_from_cache_file=False)
  y_pred = climate_change_our_trainer.predict(encoded_ds['test'])
  y_pred = y_pred.predictions
  import numpy as np
  y_pred = [np.argmax(y_pred[i]) for i in range(0,len(y_pred))]
  from sklearn.metrics import f1_score
  f1 = f1_score(encoded_ds['test']['label'],y_pred,average='macro')
  return f1

In [24]:
#Evaluating on Climate Sentiment task
@app.post("/evaluate/climate-sentiment")
async def evaluate_detection(request: Request):
  dataset=load_dataset("climatebert/climate_sentiment")
  encoded_ds = dataset.map(lambda t: climate_change_sentiment_tokenizer(t['text'],  truncation=True, padding=True), batched=True,load_from_cache_file=False)
  y_pred = climate_change_sentiment_trainer.predict(encoded_ds['test'])
  y_pred = y_pred.predictions
  import numpy as np
  y_pred = [np.argmax(y_pred[i]) for i in range(0,len(y_pred))]
  from sklearn.metrics import f1_score
  f1 = f1_score(encoded_ds['test']['label'],y_pred,average='macro')
  return f1

In [25]:
#Evaluating on Climate Specificity task
@app.post("/evaluate/climate-specificity")
async def evaluate_detection(request: Request):
  dataset=load_dataset("climatebert/climate_specificity")
  encoded_ds = dataset.map(lambda t: climate_change_specificity_tokenizer(t['text'],  truncation=True, padding=True), batched=True,load_from_cache_file=False)
  y_pred = climate_change_specificity_trainer.predict(encoded_ds['test'])
  y_pred = y_pred.predictions
  import numpy as np
  y_pred = [np.argmax(y_pred[i]) for i in range(0,len(y_pred))]
  from sklearn.metrics import f1_score
  f1 = f1_score(encoded_ds['test']['label'],y_pred,average='macro')
  return f1

# Setup and start web server

In [26]:
with open('/content/drive/MyDrive/ClimateCognize/ngrok-auth-token.txt', 'r') as file:
    token = file.readline()

ngrok.set_auth_token(token)



In [27]:
encoding = tiktoken.get_encoding("gpt2")
# with open('Here put the path to your OpenAI API key', 'r') as file:
#     key = file.readline()

#Alternatively, you can just insert your keys as plain text in the appropriate places, but this is not advised since your keys would be visible to anyone who has access to your Notebook
#For using other approaches, please visit the link provided in the description above that instructs use and import of files from other storage solutions
import os
os.environ['OPENAI_API_KEY'] = "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"
openai.api_key = "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"

In [None]:
ngrok_tunnel = ngrok.connect(8000,domain="social-glowworm-infinitely.ngrok-free.app")
print('Public URL:', ngrok_tunnel.public_url)
nest_asyncio.apply()
uvicorn.run(app, port=8000)

INFO:     Started server process [1001]
INFO:     Waiting for application startup.


Public URL: https://social-glowworm-infinitely.ngrok-free.app


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.19k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/4.48k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/887 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/329M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.19k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/4.48k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/947 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/329M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.19k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/4.48k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/895 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/329M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.19k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/4.48k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/890 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/329M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/4.48k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/980 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/329M [00:00<?, ?B/s]

INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


INFO:     92.53.48.105:0 - "POST /climate-tcfd?input_data=708-773&chosen_model=chatgpt HTTP/1.1" 200 OK
