# ESG Classification Task - Training

In [None]:
pip install --upgrade openai

In [None]:
import pandas as pd
import json
import openai

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "your-openai-api-key"

In [None]:
dataset = pd.read_csv("dataset-path")

In [None]:
d_500 = dataset.groupby('mod_sub_topic').apply(lambda x: x.sample(n=500, random_state=42)).reset_index(drop=True)
d_500

In [None]:
columns_to_drop = ['domain', 'mod_topic', 'finbert_topic', 'finbert_sub_topic']
d_500.drop(columns=columns_to_drop, axis=1, inplace=True)
d_500.rename(columns={'text': 'prompt'}, inplace=True)
d_500.rename(columns={'mod_sub_topic': 'completion'}, inplace=True)
d_500

In [None]:
# Add a suffix separator to 'prompt' column
d_500['prompt'] = d_500['prompt'] + ' ->'

# Add a whitespace character to the beginning of 'completion' column
d_500['completion'] = ' ' + d_500['completion']

d_500

In [None]:
!pip install scikit-learn

In [None]:
from sklearn.model_selection import train_test_split

X = d_500['prompt']
y = d_500['completion']

# Splitting the data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [None]:
# Convert arrays to DataFrames
d_x_train = pd.DataFrame(X_train, columns=['prompt'])
d_y_train = pd.DataFrame(y_train, columns=['completion'])

# Combine X_train and y_train into a single DataFrame
d_500_train = pd.concat([d_x_train, d_y_train], axis=1)
d_500_train

In [None]:
# Convert arrays to DataFrames
d_x_val = pd.DataFrame(X_val, columns=['prompt'])
d_y_val = pd.DataFrame(y_val, columns=['completion'])

# Combine X_train and y_train into a single DataFrame
d_500_val = pd.concat([d_x_val, d_y_val], axis=1)
d_500_val

In [None]:
d_500_train.to_json("d_500_train.jsonl", orient="records", lines=True)
d_500_val.to_json("d_500_val.jsonl", orient="records", lines=True)

In [None]:
!openai api fine_tunes.create -t "d_500_train.jsonl" -v "d_500_val.jsonl" --compute_classification_metrics --classification_n_classes 9 -m ada

In [None]:
d_500_code_train = d_500_train.copy()
d_500_code_val = d_500_val.copy()

In [None]:
d_500_code_train.loc[d_500_code_train['completion'] == ' Climate Change', 'completion'] = ' baz'
d_500_code_train.loc[d_500_code_train['completion'] == ' Resource Stewardship', 'completion'] = ' qux'
d_500_code_train.loc[d_500_code_train['completion'] == ' Environmental Opportunities', 'completion'] = ' roc'
d_500_code_train.loc[d_500_code_train['completion'] == ' Human Capital', 'completion'] = ' tuv'
d_500_code_train.loc[d_500_code_train['completion'] == ' Product Liability', 'completion'] = ' dap'
d_500_code_train.loc[d_500_code_train['completion'] == ' Social Opportunities', 'completion'] = ' stu'
d_500_code_train.loc[d_500_code_train['completion'] == ' Corporate Governance', 'completion'] = ' klo'
d_500_code_train.loc[d_500_code_train['completion'] == ' Business Ethics', 'completion'] = ' xya'
d_500_code_train.loc[d_500_code_train['completion'] == ' Non-ESG', 'completion'] = ' nop'

d_500_code_val.loc[d_500_code_val['completion'] == ' Climate Change', 'completion'] = ' baz'
d_500_code_val.loc[d_500_code_val['completion'] == ' Resource Stewardship', 'completion'] = ' qux'
d_500_code_val.loc[d_500_code_val['completion'] == ' Environmental Opportunities', 'completion'] = ' roc'
d_500_code_val.loc[d_500_code_val['completion'] == ' Human Capital', 'completion'] = ' tuv'
d_500_code_val.loc[d_500_code_val['completion'] == ' Product Liability', 'completion'] = ' dap'
d_500_code_val.loc[d_500_code_val['completion'] == ' Social Opportunities', 'completion'] = ' stu'
d_500_code_val.loc[d_500_code_val['completion'] == ' Corporate Governance', 'completion'] = ' klo'
d_500_code_val.loc[d_500_code_val['completion'] == ' Business Ethics', 'completion'] = ' xya'
d_500_code_val.loc[d_500_code_val['completion'] == ' Non-ESG', 'completion'] = ' nop'

In [None]:
d_500_code_train.to_json("d_500_code_train.jsonl", orient="records", lines=True)
d_500_code_val.to_json("d_500_code_val.jsonl", orient="records", lines=True)

In [None]:
!openai api fine_tunes.create -t "d_500_code_train.jsonl" -v "d_500_code_val.jsonl" --compute_classification_metrics --classification_n_classes 9 -m ada

In [None]:
d_500_prompt_train = d_500_train.copy()
d_500_prompt_val = d_500_val.copy()

In [None]:
# Define the classification classes and prompt
sub_topics = [" Climate Change", " Resource Stewardship", " Environmental Opportunities", " Human Capital", " Product Liability", " Social Opportunities", " Corporate Governance", " Business Ethics", " Non-ESG"]
prompt = "Classify the following text into one of the following classes:"

# Function to concatenate the string to each value in the 'input_text' column
def concatenate_prompt(row):
    return f"{prompt} {sub_topics} Text:\n'''{row['prompt']}'''"

# Apply the function to create a new column 'prompt_text'
d_500_prompt_train['prompt'] = d_500_prompt_train.apply(concatenate_prompt, axis=1)
d_500_prompt_train['prompt'] = d_500_prompt_train['prompt'].str.replace(' ->', '')
d_500_prompt_train['prompt'] = d_500_prompt_train['prompt'] + ' ->'

In [None]:
# Define the classification classes and prompt
sub_topics = [" Climate Change", " Resource Stewardship", " Environmental Opportunities", " Human Capital", " Product Liability", " Social Opportunities", " Corporate Governance", " Business Ethics", " Non-ESG"]
prompt = "Classify the following text into one of the following classes:"

# Function to concatenate the string to each value in the 'input_text' column
def concatenate_prompt(row):
    return f"{prompt} {sub_topics} Text:\n'''{row['prompt']}'''"

# Apply the function to create a new column 'prompt_text'
d_500_prompt_val['prompt'] = d_500_prompt_val.apply(concatenate_prompt, axis=1)
d_500_prompt_val['prompt'] = d_500_prompt_val['prompt'].str.replace(' ->', '')
d_500_prompt_val['prompt'] = d_500_prompt_val['prompt'] + ' ->'

In [None]:
d_500_prompt_train.to_json("d_500_prompt_train.jsonl", orient="records", lines=True)
d_500_prompt_val.to_json("d_500_prompt_val.jsonl", orient="records", lines=True)

In [None]:
!openai api fine_tunes.create -t "d_500_prompt_train.jsonl" -v "d_500_prompt_val.jsonl" --compute_classification_metrics --classification_n_classes 9 -m ada

In [None]:
d_500_prompt_code_train = d_500_train.copy()
d_500_prompt_code_val = d_500_val.copy()

In [None]:
# Define the classification classes and prompt
sub_topics = [" baz", " qux", " roc", " tuv", " dap", " stu", " klo", " xya", " nop"]
prompt = "Classify the following text into one of the following classes:"

# Function to concatenate the string to each value in the 'input_text' column
def concatenate_prompt(row):
    return f"{prompt} {sub_topics} Text:\n'''{row['prompt']}'''"

# Apply the function to create a new column 'prompt_text'
d_500_prompt_code_train['prompt'] = d_500_prompt_code_train.apply(concatenate_prompt, axis=1)
d_500_prompt_code_train['prompt'] = d_500_prompt_code_train['prompt'].str.replace(' ->', '')
d_500_prompt_code_train['prompt'] = d_500_prompt_code_train['prompt'] + ' ->'

In [None]:
d_500_prompt_code_train.loc[d_500_prompt_code_train['completion'] == ' Climate Change', 'completion'] = ' baz'
d_500_prompt_code_train.loc[d_500_prompt_code_train['completion'] == ' Resource Stewardship', 'completion'] = ' qux'
d_500_prompt_code_train.loc[d_500_prompt_code_train['completion'] == ' Environmental Opportunities', 'completion'] = ' roc'
d_500_prompt_code_train.loc[d_500_prompt_code_train['completion'] == ' Human Capital', 'completion'] = ' tuv'
d_500_prompt_code_train.loc[d_500_prompt_code_train['completion'] == ' Product Liability', 'completion'] = ' dap'
d_500_prompt_code_train.loc[d_500_prompt_code_train['completion'] == ' Social Opportunities', 'completion'] = ' stu'
d_500_prompt_code_train.loc[d_500_prompt_code_train['completion'] == ' Corporate Governance', 'completion'] = ' klo'
d_500_prompt_code_train.loc[d_500_prompt_code_train['completion'] == ' Business Ethics', 'completion'] = ' xya'
d_500_prompt_code_train.loc[d_500_prompt_code_train['completion'] == ' Non-ESG', 'completion'] = ' nop'

d_500_prompt_code_val.loc[d_500_prompt_code_val['completion'] == ' Climate Change', 'completion'] = ' baz'
d_500_prompt_code_val.loc[d_500_prompt_code_val['completion'] == ' Resource Stewardship', 'completion'] = ' qux'
d_500_prompt_code_val.loc[d_500_prompt_code_val['completion'] == ' Environmental Opportunities', 'completion'] = ' roc'
d_500_prompt_code_val.loc[d_500_prompt_code_val['completion'] == ' Human Capital', 'completion'] = ' tuv'
d_500_prompt_code_val.loc[d_500_prompt_code_val['completion'] == ' Product Liability', 'completion'] = ' dap'
d_500_prompt_code_val.loc[d_500_prompt_code_val['completion'] == ' Social Opportunities', 'completion'] = ' stu'
d_500_prompt_code_val.loc[d_500_prompt_code_val['completion'] == ' Corporate Governance', 'completion'] = ' klo'
d_500_prompt_code_val.loc[d_500_prompt_code_val['completion'] == ' Corporate Behavior', 'completion'] = ' xya'
d_500_prompt_code_val.loc[d_500_prompt_code_val['completion'] == ' Non-ESG', 'completion'] = ' nop'

In [None]:
# Define the classification classes and prompt
sub_topics = [" baz", " qux", " roc", " tuv", " dap", " stu", " klo", " xya", " nop"]
prompt = "Classify the following text into one of the following classes:"

# Function to concatenate the string to each value in the 'input_text' column
def concatenate_prompt(row):
    return f"{prompt} {sub_topics} Text:\n'''{row['prompt']}'''"

# Apply the function to create a new column 'prompt_text'
d_500_prompt_code_val['prompt'] = d_500_prompt_code_val.apply(concatenate_prompt, axis=1)
d_500_prompt_code_val['prompt'] = d_500_prompt_code_val['prompt'].str.replace(' ->', '')
d_500_prompt_code_val['prompt'] = d_500_prompt_code_val['prompt'] + ' ->'

In [None]:
d_500_prompt_code_train.to_json("d_500_prompt_code_train.jsonl", orient="records", lines=True)
d_500_prompt_code_val.to_json("d_500_prompt_code_val.jsonl", orient="records", lines=True)

In [None]:
!openai api fine_tunes.create -t "d_500_prompt_code_train.jsonl" -v "d_500_prompt_code_val.jsonl" --compute_classification_metrics --classification_n_classes 9 -m ada

# Zero-Shot Prompting

In [None]:
d_500_val['prompt'] = d_500_val['prompt'].str.replace(' ->', '')

In [None]:
def generate_completion(prompt):
    system_content = """
    You are an ESG news articles classifier. There are nine classes with these definitions:\n
    1. Climate Change: Pertains to news articles discussing topics such as carbon emissions reduction initiatives, the carbon footprint of products, climate change vulnerabilities, and financial initiatives or instruments designed to mitigate the impact of climate change. This can include policies or corporate strategies targeting climate change.\n
    2. Resource Stewardship: Involves articles highlighting how companies manage natural resources and waste. It encompasses a range of issues including but not limited to water conservation, biodiversity, sustainable land use, responsible raw material sourcing, toxic emissions reduction, and effective waste management, including electronic waste.\n
    3. Environmental Opportunities: Includes articles that focus on the potential opportunities arising from environmental conservation efforts. It covers green technology innovations, renewable energy initiatives, sustainable building projects, and financial investments targeting environmental sustainability.\n
    4. Human Capital: This class encompasses news items related to labor management, employee welfare, and workforce development. It can include articles about health & safety protocols, human capital development programs, and supply chain labor standards, including diversity, equity, and inclusion initiatives in the workplace.\n
    5. Product Liability: Relates to articles discussing aspects of product safety and quality, including chemical safety and consumer financial protection. It includes topics such as privacy and data security issues, and socially responsible investment, which might impact product liability.\n
    6. Social Opportunities: This category covers articles focusing on the societal benefits generated through corporate initiatives. It includes news on community financing, enhancing healthcare access, nutrition and health opportunities, educational initiatives, and investments aimed at social development.\n
    7. Corporate Governance: Deals with articles related to the structural and strategic management aspects of corporations. This includes topics such as ownership and control dynamics, board composition and performance, executive remuneration, accounting transparency, reorganization, and significant executive team changes, including the hiring or resignation of C-level executives and directors.\n
    8. Business Ethics: Encompasses articles on the ethical considerations of business operations. It includes subjects such as tax transparency, anti-corruption measures, fraud prevention, and adherence to ethical business practices and regulations.\n
    9. Non-ESG: This class is for articles that do not fit into any of the above ESG categories. It can include a wide range of topics not directly related to environmental, social, and governance issues.\n
    You need to classify the news articles into one of the classes
    """
    
    user_content = f"""
    First, list CLUES (i.e., keywords, phrases, contextual information, semantic relations, semantic meaning, tones,
    references) that support the class determination of input.
    Second, deduce the diagnostic REASONING process from premises (i.e., clues, input) that supports the INPUT
    class determination (limit the number of words to 130).
    Third, based on clues, reasoning and input, determine the CLASS of INPUT.
    
    Construct the output into a json in this format:
    {{"clues": "",
      "reasoning": "",
      "class": ""}}

    \nINPUT: `{prompt}`
    """

    completion = openai.ChatCompletion.create(
        model="gpt-3.5-turbo-0613",
        messages=[
            {"role": "system", "content": system_content},
            {"role": "user", "content": user_content}
        ]
    )
    
    completion_str = completion.choices[0].message

    # Parse the "content" field to get the inner JSON
    inner_completion = completion_str["content"]
    inner_json = json.loads(inner_completion)

    # Extract the fields
    clues = inner_json["clues"]
    reasoning = inner_json["reasoning"]
    _class = inner_json["class"]

    # Create a dictionary with the extracted data
    data = {
        "clues": [clues],
        "reasoning": [reasoning],
        "class": [_class]
    }
    return data

# Load your prompts DataFrame
prompts_df = d_500_val.copy()

for index, row in prompts_df.iloc[:900].iterrows():
    prompt = row['prompt']
    output = generate_completion(prompt)
    output["prompt"] = prompt
    # Save the dataframe
    output_df = pd.DataFrame([output])
    results_df_carp = pd.concat([results_df_carp, output_df], ignore_index=True)
    # Check the current index
    print(f"Index: {index}")
    print(f"Prompt: {prompt}")
    print(f"Output: {output}")

# Save the results DataFrame to a CSV file
results_df_carp.to_csv('results_df_carp.csv', index=False)

# News Retrieval (NYT)

In [None]:
import requests
import time
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

# Replace 'your-api-key' with your actual API key
API_KEY = 'your-api-key'

def get_articles(year, month):
    url = f'https://api.nytimes.com/svc/archive/v1/{year}/{month}.json?api-key={API_KEY}'
    response = requests.get(url)
    data = response.json()

    articles = []
    for article in data['response']['docs']:
        keywords = [keyword['value'] for keyword in article['keywords'] if keyword['name'] in ['organizations', 'subject']]
        articles.append({
            'abstract': article.get('abstract'),
            'web_url': article.get('web_url'),
            'snippet': article.get('snippet'),
            'lead_paragraph': article.get('lead_paragraph'),
            'headline': article.get('headline', {}).get('main'),
            'print_headline': article.get('headline', {}).get('print_headline'),
            'keywords': keywords,
            'pub_date': article.get('pub_date')
        })
    return articles

start_date = datetime(2003, 1, 1)
end_date = datetime(2022, 12, 31)

date = start_date
articles = {}
current_year = start_date.year
while date <= end_date:
    print(f"Getting articles for {date.year}-{date.month}")

    # Get articles
    year_articles = get_articles(date.year, date.month)

    # If the year has changed, save the previous year's articles to a DataFrame and CSV file
    if date.year != current_year:
        df = pd.DataFrame(articles[current_year])
        df.replace('', np.nan, inplace=True)  # Replace empty strings with NaN
        df.to_csv(f'/df_nyt_{current_year}.csv', index=False)
        current_year = date.year

    # Save the articles
    if current_year in articles:
        articles[current_year] += year_articles
    else:
        articles[current_year] = year_articles

    # Move to next month
    date += relativedelta(months=1)

    # Wait 12 seconds to avoid rate limit
    time.sleep(12)

# Save the last year's articles to a DataFrame and CSV file
df = pd.DataFrame(articles[current_year])
df.replace('', np.nan, inplace=True)  # Replace empty strings with NaN
df.to_csv(f'/df_nyt_{current_year}.csv', index=False)

# ESG Pipeline

In [None]:
!pip install spacy
!python3 -m spacy download en_core_web_md

In [None]:
#Load data
df_nyt_2022 = pd.read_csv("NYT/df_nyt_2022.csv")
df_nyt_2021 = pd.read_csv("NYT/df_nyt_2021.csv")
df_nyt_2020 = pd.read_csv("NYT/df_nyt_2020.csv")
df_nyt_2019 = pd.read_csv("NYT/df_nyt_2019.csv")
df_nyt_2018 = pd.read_csv("NYT/df_nyt_2018.csv")
df_nyt_2017 = pd.read_csv("NYT/df_nyt_2017.csv")
df_nyt_2016 = pd.read_csv("NYT/df_nyt_2016.csv")
df_nyt_2015 = pd.read_csv("NYT/df_nyt_2015.csv")
df_nyt_2014 = pd.read_csv("NYT/df_nyt_2014.csv")
df_nyt_2013 = pd.read_csv("NYT/df_nyt_2013.csv")
df_nyt_2012 = pd.read_csv("NYT/df_nyt_2012.csv")
df_nyt_2011 = pd.read_csv("NYT/df_nyt_2011.csv")
df_nyt_2010 = pd.read_csv("NYT/df_nyt_2010.csv")
df_nyt_2009 = pd.read_csv("NYT/df_nyt_2009.csv")
df_nyt_2008 = pd.read_csv("NYT/df_nyt_2008.csv")
df_nyt_2007 = pd.read_csv("NYT/df_nyt_2007.csv")
df_nyt_2006 = pd.read_csv("NYT/df_nyt_2006.csv")
df_nyt_2005 = pd.read_csv("NYT/df_nyt_2005.csv")
df_nyt_2004 = pd.read_csv("NYT/df_nyt_2004.csv")
df_nyt_2003 = pd.read_csv("NYT/df_nyt_2003.csv")

In [None]:
dfs = [
    df_nyt_2022, 
    df_nyt_2021, 
    df_nyt_2020, 
    df_nyt_2019, 
    df_nyt_2018, 
    df_nyt_2017, 
    df_nyt_2016, 
    df_nyt_2015, 
    df_nyt_2014, 
    df_nyt_2013, 
    df_nyt_2012, 
    df_nyt_2011, 
    df_nyt_2010, 
    df_nyt_2009, 
    df_nyt_2008, 
    df_nyt_2007, 
    df_nyt_2006, 
    df_nyt_2005, 
    df_nyt_2004, 
    df_nyt_2003
]

In [None]:
import ast

# Define the function to parse keywords
def parse_keywords(keyword_str):
    try:
        return ast.literal_eval(keyword_str)
    except (SyntaxError, ValueError):
        return []

# Loop over each company
for index, row in df_companies.iterrows():
    company_name = "company-name"
    symbol = "symbol"

    # List to store all the rows relevant to the current company across all years
    all_rows = []

    # Loop over each dataframe
    for i, df in enumerate(dfs):
        year = 2022 - i  # Calculate the year based on the index of the dataframe
        
        # Apply the function to parse keywords
        df['parsed_keywords'] = df['keywords'].apply(parse_keywords)
        
        # Get all rows for the current company in the current dataframe and add them to the list
        company_rows = df[df['parsed_keywords'].apply(lambda keywords: any(keyword for keyword in keywords if company_name in keyword))]
        all_rows.append(company_rows)
    
    # Concatenate all the rows into a single dataframe for the current company
    globals()[f"{symbol.lower()}_df"] = pd.concat(all_rows)

# Now, you will have dataframes named like aapl_df, msft_df, etc., each containing data from 2022 to 2003.

In [None]:
company_df['text'] = company_df['headline'].str.cat(company_df['lead_paragraph'], sep='. ')
company_df.head()

In [None]:
# Fill NaN values with an empty string
company_df['text'].fillna("", inplace=True)

In [None]:
import spacy

# Load the pre-trained language model
nlp = spacy.load('en_core_web_md')

# Function to identify organizations in a text
def identify_organizations(text):
    doc = nlp(text)
    orgs = [ent.text for ent in doc.ents if ent.label_ == "ORG"]
    return orgs

# Apply the function to every row in the 'text' column and store the results in a new column 'ner_result'
company_df['ner_result'] = company_df['text'].apply(identify_organizations)

In [None]:
company_df = company_df[company_df['ner_result'].apply(lambda ner_result: isinstance(ner_result, list) and 'company_name' in ner_result)]
company_df

In [None]:
from transformers import BertTokenizer, BertForSequenceClassification, pipeline

finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-esg-9-categories',num_labels=9)
tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-esg-9-categories')
esg_classifier = pipeline("text-classification", model=finbert, tokenizer=tokenizer)

results = esg_classifier('For 2002, our total net emissions were approximately 60 million metric tons of CO2 equivalents for all businesses and operations we have ﬁnancial interests in, based on its equity share in those businesses and operations.')
print(results) # [{'label': 'Climate Change', 'score': 0.9955655932426453}]

In [None]:
company_df['sub_label_text'] = None
company_df['sub_score_text'] = None
company_df.head()

In [None]:
for index, row in company_df.iterrows():
    if isinstance(row['text'], str):
        results_concat = nlp(row['text'])
        company_df.at[index, 'sub_label_text'] = results_concat[0]['label']
        company_df.at[index, 'sub_score_text'] = results_concat[0]['score']

In [None]:
company_df = company_df[company_df['sub_label_text'] != 'Non-ESG']
company_df

In [None]:
company_df = company_df.reset_index(drop=True)
company_df.head()

In [None]:
# Define the classification classes and prompt
sub_topics = ["Climate Change", "Resource Stewardship", "Environmental Opportunities", "Human Capital", "Product Liability", "Social Opportunities", "Corporate Governance", "Business Ethics", "Non-ESG"]
prompt = "Classify the following text into one of the following classes:"

# Function to concatenate the string to each value in the 'input_text' column
def concatenate_prompt(row):
    return f"{prompt} {sub_topics} Text:\n'''{row['text']}''' ->"

# Apply the function to create a new column 'prompt_text'
company_df['prompt_text'] = company_df.apply(concatenate_prompt, axis=1)

In [None]:
for index, row in company_df.iloc[0:x].iterrows():
    # Get the text from the current row
    prompt = company_df.loc[index, 'prompt_text']
    
    # Get the completion from the OpenAI API
    response = openai.Completion.create(
        model="model-id",
        prompt=prompt,
        temperature=0,
        max_tokens=3,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    
    print(response)
    
    # Get the text from the API response and store it in the DataFrame
    company_df.loc[index, 'prompt_ada_result'] = response['choices'][0]['text'].strip()

In [None]:
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")

# Use a pipeline as a high-level helper
from transformers import pipeline

finbert_tone = pipeline("text-classification", model="yiyanghkust/finbert-tone")

In [None]:
for index, row in msft_test.iterrows():
    if isinstance(row['text'], str):
        results_concat = finbert_tone(row['text'])
        company_df.at[index, 'finbert_sentiment'] = results_concat[0]['label']
        company_df.at[index, 'finbert_sentiment_score'] = results_concat[0]['score']