In [2]:
!pip install tika



In [56]:
!pip install ace-tools

Collecting ace-tools
  Obtaining dependency information for ace-tools from https://files.pythonhosted.org/packages/27/c4/402d3ae2ecbfe72fbdcb2769f55580f1c54a3ca110c44e1efc034516a499/ace_tools-0.0-py3-none-any.whl.metadata
  Downloading ace_tools-0.0-py3-none-any.whl.metadata (300 bytes)
Downloading ace_tools-0.0-py3-none-any.whl (1.1 kB)
Installing collected packages: ace-tools
Successfully installed ace-tools-0.0


In [37]:
import pandas as pd
df = pd.read_csv('data.csv')
df

Unnamed: 0,CompanyName,Industry,Sector,Year,URL
0,Amgen Inc.,Drug Manufacturers - Major,Healthcare,2022,https://www.responsibilityreports.com/HostedDa...
1,Amgen Inc.,Drug Manufacturers - Major,Healthcare,2021,https://www.responsibilityreports.com/HostedDa...
2,Amgen Inc.,Drug Manufacturers - Major,Healthcare,2020,https://www.responsibilityreports.com/HostedDa...
3,AstraZeneca plc,Drug Manufacturers - Major,Healthcare,2022,https://www.responsibilityreports.com/HostedDa...
4,AstraZeneca plc,Drug Manufacturers - Major,Healthcare,2021,https://www.responsibilityreports.com/HostedDa...
5,AstraZeneca plc,Drug Manufacturers - Major,Healthcare,2020,https://www.responsibilityreports.com/HostedDa...
6,Bayer AG,Drug Manufacturers - Major,Healthcare,2022,https://www.responsibilityreports.com/HostedDa...
7,Bayer AG,Drug Manufacturers - Major,Healthcare,2021,https://www.responsibilityreports.com/HostedDa...
8,Bayer AG,Drug Manufacturers - Major,Healthcare,2020,https://www.responsibilityreports.com/HostedDa...
9,Johnson & Johnson,Drug Manufacturers - Major,Healthcare,2022,https://www.responsibilityreports.com/HostedDa...


In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline
from tika import parser
import re


tokenizer = AutoTokenizer.from_pretrained("nbroad/ESG-BERT")

model = AutoModelForSequenceClassification.from_pretrained("nbroad/ESG-BERT")

# Create the pipeline for text classification
classifier = pipeline('text-classification', model=model, tokenizer=tokenizer)

max_length = 1024
model_checkpoint = "prajjwal1/bert-medium"
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)

tokenizer.model_max_length = max_length
model.config.max_position_embeddings = max_length
model.base_model.embeddings.position_ids = torch.arange(max_length).expand((1, -1))
model.base_model.embeddings.token_type_ids = torch.zeros(max_length).expand((1, -1))
orig_pos_emb = model.base_model.embeddings.position_embeddings.weight
model.base_model.embeddings.position_embeddings.weight = torch.nn.Parameter(torch.cat((orig_pos_emb, orig_pos_emb)))

  return torch.load(checkpoint_file, map_location="cpu")
Xformers is not installed correctly. If you want to use memorry_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.
Some weights of the model checkpoint at prajjwal1/bert-medium were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassifica

In [4]:
# Create a Class to parse PDF
class PDFParser:
    def __init__(self, file_path):
        self.file_path = file_path
        self.raw = parser.from_file(self.file_path)
        self.text = self.raw['content']

    def get_text(self):
        return self.text

    def get_text_clean(self):
        text = self.text
        text = re.sub(r'\n', ' ', text)
        text = re.sub(r'\s+', ' ', text)
        return text

    def get_text_clean_list(self):
        text = self.get_text_clean()
        text_list = text.split('.')
        return text_list

In [5]:
def preprocess_text(sentences):
    # Preprocess and split text into manageable parts
    processed_texts = []
    for sentence in sentences:
        tokens = tokenizer.encode(sentence, add_special_tokens=True)
        if len(tokens) > 512:
            # If the sentence exceeds the limit, split it into chunks
            start = 0
            while start < len(tokens):
                chunk = tokens[start:start + 511]  # reserve space for special tokens
                processed_texts.append(tokenizer.decode(chunk))
                start += 511
        else:
            processed_texts.append(sentence)
    return processed_texts

In [39]:
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
from transformers import pipeline

# Assuming PDFParser and classifier are already defined and imported correctly

def process_company_report(company_name, industry, sector, year, url):
    try:
        pp = PDFParser(url)
        sentences = pp.get_text_clean_list()
        print(f"Processing {company_name} report for {year} with {len(sentences):,d} sentences.")
        results = []

        batch_size = 10
        current_batch = []

        for sentence in sentences:
            # Tokenize the sentence and check its length
            tokens = tokenizer.encode(sentence, add_special_tokens=True, truncation=True, max_length=512)
            truncated_sentence = tokenizer.decode(tokens, skip_special_tokens=True)

            # Accumulate sentences until the batch is full
            if len(current_batch) < batch_size:
                current_batch.append(truncated_sentence)
            else:
                # When batch is full, process it
                if current_batch:
                    results.extend(classifier(current_batch))
                current_batch = [truncated_sentence]  # Start a new batch with the current sentence

        # Process the last batch if it's not empty
        if current_batch:
            results.extend(classifier(current_batch))

        df = pd.DataFrame(results)
        # Add the additional columns for output format
        df['CompanyName'] = company_name
        df['Industry'] = industry
        df['Sector'] = sector
        df['Year'] = year
        return df
    except Exception as e:
        print(f"Failed to process {company_name} for {year} from {url}: {e}")
        return pd.DataFrame()  # Return an empty DataFrame in case of failure

def run_classifier_parallel(company_data_df):
    all_data = []
    company_info = company_data_df[['CompanyName', 'Industry', 'Sector', 'Year', 'URL']].values.tolist()
    with ThreadPoolExecutor() as executor:
        # Submit tasks with the additional information
        future_to_url = {
            executor.submit(process_company_report, company_name, industry, sector, year, url): 
            (company_name, industry, sector, year, url) 
            for company_name, industry, sector, year, url in company_info
        }
        
        for future in as_completed(future_to_url):
            df = future.result()
            if not df.empty:
                all_data.append(df)

    # Concatenate all results into a single DataFrame
    return pd.concat(all_data, ignore_index=True) if all_data else pd.DataFrame()

# Assuming 'df' is the DataFrame with columns 'Company Name', 'Industry', 'Sector', 'Year', and 'URL'
result_data = run_classifier_parallel(df)

print(result_data)


2024-11-07 22:15:14,070 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/a/NASDAQ_AMGN_2022.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-a-nasdaq_amgn_2022.pdf.
2024-11-07 22:15:14,071 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/a/NASDAQ_AMGN_2021.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-a-nasdaq_amgn_2021.pdf.
2024-11-07 22:15:14,071 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/a/NASDAQ_AMGN_2020.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-a-nasdaq_amgn_2020.pdf.
2024-11-07 22:15:14,072 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/a/LSE_AZN_2022.pdf to /var/folders/3n/ym5c

Processing Amgen Inc. report for 2022 with 2,128 sentences.
Processing Hologic Inc. report for 2022 with 3,656 sentences.
Processing AstraZeneca plc report for 2021 with 1,090 sentences.
Processing Amgen Inc. report for 2020 with 1,446 sentences.
Processing Bayer AG report for 2020 with 6,866 sentences.
Processing Bayer AG report for 2021 with 8,174 sentences.
Processing Johnson & Johnson report for 2020 with 3,502 sentences.
Processing AstraZeneca plc report for 2022 with 1,040 sentences.
Processing AstraZeneca plc report for 2020 with 1,968 sentences.
Processing Amgen Inc. report for 2021 with 1,924 sentences.
Processing Hologic Inc. report for 2021 with 1,100 sentences.
Processing Bayer AG report for 2022 with 9,091 sentences.
Processing Johnson & Johnson report for 2021 with 4,003 sentences.
Processing Johnson & Johnson report for 2022 with 3,993 sentences.


2024-11-07 22:20:09,113 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/h/NASDAQ_HOLX_2020.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-h-nasdaq_holx_2020.pdf.


Processing Hologic Inc. report for 2020 with 1,197 sentences.


2024-11-07 22:20:12,990 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/a/NYSE_AVNS_2022.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-a-nyse_avns_2022.pdf.


Processing Avanos Medical, Inc. report for 2022 with 212 sentences.


2024-11-07 22:20:19,786 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/a/NYSE_AVNS_2021.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-a-nyse_avns_2021.pdf.


Processing Avanos Medical, Inc. report for 2021 with 232 sentences.


2024-11-07 22:21:12,225 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/a/NYSE_AVNS_2020.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-a-nyse_avns_2020.pdf.


Processing Avanos Medical, Inc. report for 2020 with 225 sentences.


2024-11-07 22:21:21,759 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/b/NYSE_BSX_2022.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-b-nyse_bsx_2022.pdf.


Processing Boston Scientific Corporation report for 2022 with 3,596 sentences.


2024-11-07 22:21:32,242 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/b/NYSE_BSX_2021.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-b-nyse_bsx_2021.pdf.


Processing Boston Scientific Corporation report for 2021 with 1,472 sentences.


2024-11-07 22:22:23,895 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/b/NYSE_BSX_2020.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-b-nyse_bsx_2020.pdf.


Processing Boston Scientific Corporation report for 2020 with 921 sentences.


2024-11-07 22:23:58,828 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/C/ASX_COH_2022.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-c-asx_coh_2022.pdf.


Processing Cochlear Limited report for 2022 with 900 sentences.


2024-11-07 22:24:20,015 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/C/ASX_COH_2021.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-c-asx_coh_2021.pdf.


Processing Cochlear Limited report for 2021 with 1,168 sentences.


2024-11-07 22:25:39,528 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/C/ASX_COH_2020.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-c-asx_coh_2020.pdf.


Processing Cochlear Limited report for 2020 with 2,436 sentences.


2024-11-07 22:26:28,989 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/c/NYSE_CI_2022.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-c-nyse_ci_2022.pdf.


Processing CIGNA Corporation report for 2022 with 1,996 sentences.


2024-11-07 22:26:40,506 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/c/NYSE_CI_2021.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-c-nyse_ci_2021.pdf.


Processing CIGNA Corporation report for 2021 with 585 sentences.


2024-11-07 22:27:41,579 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/c/NYSE_CI_2020.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-c-nyse_ci_2020.pdf.


Processing CIGNA Corporation report for 2020 with 8,984 sentences.


2024-11-07 22:28:10,805 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/c/NYSE_CVS_2022.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-c-nyse_cvs_2022.pdf.


Processing CVS Health report for 2022 with 432 sentences.


2024-11-07 22:29:30,241 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/c/NYSE_CVS_2021.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-c-nyse_cvs_2021.pdf.


Processing CVS Health report for 2021 with 761 sentences.


2024-11-07 22:29:42,800 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/c/NYSE_CVS_2020.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-c-nyse_cvs_2020.pdf.
2024-11-07 22:29:47,209 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/e/NYSE_ELV_2022.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-e-nyse_elv_2022.pdf.


Failed to process CVS Health for 2020 from https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/c/NYSE_CVS_2020.pdf: expected string or bytes-like object, got 'NoneType'
Processing Elevance Health, Inc. report for 2022 with 1,010 sentences.


2024-11-07 22:30:07,644 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/e/NYSE_ELV_2021.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-e-nyse_elv_2021.pdf.


Processing Elevance Health, Inc. report for 2021 with 740 sentences.


2024-11-07 22:31:00,829 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/e/NYSE_ELV_2020.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-e-nyse_elv_2020.pdf.


Processing Elevance Health, Inc. report for 2020 with 213 sentences.


2024-11-07 22:31:45,796 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/u/NYSE_UNH_2022.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-u-nyse_unh_2022.pdf.


Processing UnitedHealth Group Inc. report for 2022 with 1,861 sentences.


2024-11-07 22:32:14,286 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/u/NYSE_UNH_2021.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-u-nyse_unh_2021.pdf.


Processing UnitedHealth Group Inc. report for 2021 with 1,355 sentences.


2024-11-07 22:32:52,880 [ThreadPoolEx] [INFO ]  Retrieving https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/u/NYSE_UNH_2020.pdf to /var/folders/3n/ym5c9__567b2nsmfm2wz5_br0000gn/T/hosteddata-responsibilityreportarchive-u-nyse_unh_2020.pdf.


Processing UnitedHealth Group Inc. report for 2020 with 888 sentences.
                                              label     score  \
0                          Access_And_Affordability  0.340901   
1                              Competitive_Behavior  0.279085   
2                          Access_And_Affordability  0.656968   
3      Management_Of_Legal_And_Regulatory_Framework  0.540538   
4                         Business_Model_Resilience  0.309790   
...                                             ...       ...   
81160            Physical_Impacts_Of_Climate_Change  0.910186   
81161  Management_Of_Legal_And_Regulatory_Framework  0.648359   
81162                              Customer_Welfare  0.269527   
81163            Physical_Impacts_Of_Climate_Change  0.910186   
81164   Employee_Engagement_Inclusion_And_Diversity  0.393710   

             CompanyName                    Industry      Sector  Year  
0        AstraZeneca plc  Drug Manufacturers - Major  Healthcare  2021  
1 

In [41]:
grouped_data = result_data.groupby(['CompanyName', 'Industry', 'Sector', 'Year', 'label']).score.mean().reset_index()

# Pivot the data so each unique label becomes a column, with scores as values
pivoted_data = grouped_data.pivot(index=['CompanyName', 'Industry', 'Sector', 'Year'], columns='label', values='score').reset_index()

# Rename columns to remove multi-index structure created by pivot
pivoted_data.columns.name = None  # Remove the index name

pivoted_data

Unnamed: 0,CompanyName,Industry,Sector,Year,Access_And_Affordability,Air_Quality,Business_Ethics,Business_Model_Resilience,Competitive_Behavior,Critical_Incident_Risk_Management,...,Labor_Practices,Management_Of_Legal_And_Regulatory_Framework,Physical_Impacts_Of_Climate_Change,Product_Design_And_Lifecycle_Management,Product_Quality_And_Safety,Selling_Practices_And_Product_Labeling,Supply_Chain_Management,Systemic_Risk_Management,Waste_And_Hazardous_Materials_Management,Water_And_Wastewater_Management
0,Amgen Inc.,Drug Manufacturers - Major,Healthcare,2020,0.543071,0.661859,0.768287,0.399884,0.313991,0.611724,...,0.559824,0.528019,0.766068,0.65342,0.563373,0.473794,0.243049,0.51172,0.914585,0.94634
1,Amgen Inc.,Drug Manufacturers - Major,Healthcare,2021,0.554402,0.919901,0.685301,0.42088,0.271933,0.59399,...,0.686468,0.524149,0.700338,0.667017,0.673004,0.423807,0.216817,0.486892,0.755648,0.81278
2,Amgen Inc.,Drug Manufacturers - Major,Healthcare,2022,0.537965,0.470437,0.69717,0.400031,0.295552,0.568351,...,0.533106,0.494548,0.805487,0.662221,0.685324,0.374829,0.317656,0.474715,0.910131,0.883247
3,AstraZeneca plc,Drug Manufacturers - Major,Healthcare,2020,0.51356,0.812245,0.843195,0.468163,0.270528,0.667499,...,0.749569,0.452209,0.797712,0.6565,0.462833,0.512877,0.794841,0.452684,0.920139,0.91562
4,AstraZeneca plc,Drug Manufacturers - Major,Healthcare,2021,0.565152,0.577569,0.821552,0.424597,0.242517,0.594196,...,0.683276,0.419751,0.847874,0.651687,0.444614,0.535195,0.770781,0.448741,0.873677,0.843332
5,AstraZeneca plc,Drug Manufacturers - Major,Healthcare,2022,0.560903,0.785161,0.797112,0.434232,0.265389,0.204364,...,0.741022,0.468208,0.777058,0.65443,0.529257,0.270332,0.784577,0.434794,0.892388,0.853952
6,"Avanos Medical, Inc.",Medical Appliances & Equipment,Healthcare,2020,0.47238,,0.649706,0.328571,0.32376,0.799637,...,0.787273,0.496993,,0.716462,0.632381,0.645623,0.744499,0.385688,0.895167,0.772111
7,"Avanos Medical, Inc.",Medical Appliances & Equipment,Healthcare,2021,0.441834,,0.676229,0.298662,0.386835,0.74021,...,0.842935,0.486709,0.610708,0.617583,0.644806,0.645623,0.749767,0.385688,0.913885,0.732053
8,"Avanos Medical, Inc.",Medical Appliances & Equipment,Healthcare,2022,0.440919,,0.71748,0.390583,0.339683,0.863162,...,0.842493,0.565937,,0.66624,0.641077,0.645623,0.749725,0.341507,0.845486,0.845534
9,Bayer AG,Drug Manufacturers - Major,Healthcare,2020,0.549171,0.725959,0.664956,0.372115,0.363582,0.727791,...,0.711013,0.463574,0.639328,0.72033,0.627021,0.5595,0.696585,0.550496,0.31986,0.913034


# Data Cleaning

In [44]:
pivoted_data = pivoted_data.fillna(0)
pivoted_data = pivoted_data.round(2)

pivoted_data

Unnamed: 0,CompanyName,Industry,Sector,Year,Access_And_Affordability,Air_Quality,Business_Ethics,Business_Model_Resilience,Competitive_Behavior,Critical_Incident_Risk_Management,...,Labor_Practices,Management_Of_Legal_And_Regulatory_Framework,Physical_Impacts_Of_Climate_Change,Product_Design_And_Lifecycle_Management,Product_Quality_And_Safety,Selling_Practices_And_Product_Labeling,Supply_Chain_Management,Systemic_Risk_Management,Waste_And_Hazardous_Materials_Management,Water_And_Wastewater_Management
0,Amgen Inc.,Drug Manufacturers - Major,Healthcare,2020,0.54,0.66,0.77,0.4,0.31,0.61,...,0.56,0.53,0.77,0.65,0.56,0.47,0.24,0.51,0.91,0.95
1,Amgen Inc.,Drug Manufacturers - Major,Healthcare,2021,0.55,0.92,0.69,0.42,0.27,0.59,...,0.69,0.52,0.7,0.67,0.67,0.42,0.22,0.49,0.76,0.81
2,Amgen Inc.,Drug Manufacturers - Major,Healthcare,2022,0.54,0.47,0.7,0.4,0.3,0.57,...,0.53,0.49,0.81,0.66,0.69,0.37,0.32,0.47,0.91,0.88
3,AstraZeneca plc,Drug Manufacturers - Major,Healthcare,2020,0.51,0.81,0.84,0.47,0.27,0.67,...,0.75,0.45,0.8,0.66,0.46,0.51,0.79,0.45,0.92,0.92
4,AstraZeneca plc,Drug Manufacturers - Major,Healthcare,2021,0.57,0.58,0.82,0.42,0.24,0.59,...,0.68,0.42,0.85,0.65,0.44,0.54,0.77,0.45,0.87,0.84
5,AstraZeneca plc,Drug Manufacturers - Major,Healthcare,2022,0.56,0.79,0.8,0.43,0.27,0.2,...,0.74,0.47,0.78,0.65,0.53,0.27,0.78,0.43,0.89,0.85
6,"Avanos Medical, Inc.",Medical Appliances & Equipment,Healthcare,2020,0.47,0.0,0.65,0.33,0.32,0.8,...,0.79,0.5,0.0,0.72,0.63,0.65,0.74,0.39,0.9,0.77
7,"Avanos Medical, Inc.",Medical Appliances & Equipment,Healthcare,2021,0.44,0.0,0.68,0.3,0.39,0.74,...,0.84,0.49,0.61,0.62,0.64,0.65,0.75,0.39,0.91,0.73
8,"Avanos Medical, Inc.",Medical Appliances & Equipment,Healthcare,2022,0.44,0.0,0.72,0.39,0.34,0.86,...,0.84,0.57,0.0,0.67,0.64,0.65,0.75,0.34,0.85,0.85
9,Bayer AG,Drug Manufacturers - Major,Healthcare,2020,0.55,0.73,0.66,0.37,0.36,0.73,...,0.71,0.46,0.64,0.72,0.63,0.56,0.7,0.55,0.32,0.91


In [45]:
pivoted_data.to_csv('esg_data.csv', index=False) 

In [53]:
import pandas as pd


# Define the categories
environmental_labels = ['Air_Quality', 'Ecological_Impacts', 'Energy_Management', 'GHG_Emissions',
                        'Physical_Impacts_Of_Climate_Change', 'Product_Design_And_Lifecycle_Management',
                        'Waste_And_Hazardous_Materials_Management', 'Water_And_Wastewater_Management']

social_labels = ['Access_And_Affordability', 'Customer_Privacy', 'Customer_Welfare',
                 'Employee_Engagement_Inclusion_And_Diversity', 'Employee_Health_And_Safety',
                 'Human_Rights_And_Community_Relations', 'Labor_Practices']

governance_labels = ['Business_Ethics', 'Business_Model_Resilience', 'Competitive_Behavior',
                     'Critical_Incident_Risk_Management', 'Data_Security', 'Director_Removal',
                     'Management_Of_Legal_And_Regulatory_Framework', 'Product_Quality_And_Safety',
                     'Selling_Practices_And_Product_Labeling', 'Supply_Chain_Management', 'Systemic_Risk_Management']

# Sum the scores in each category and then divide by the total number of non-NaN entries for each row to get the average
pivoted_data['E_Score'] = pivoted_data[environmental_labels].sum(axis=1) 
pivoted_data['S_Score'] = pivoted_data[social_labels].sum(axis=1) 
pivoted_data['G_Score'] = pivoted_data[governance_labels].sum(axis=1) 
pivoted_data['ESG_Score'] = pivoted_data['E_Score'] + pivoted_data['S_Score'] + pivoted_data['G_Score']

# Display the updated DataFrame
pivoted_data1 = pivoted_data[['CompanyName', 'Year', 'E_Score', 'S_Score', 'G_Score', 'ESG_Score']]
pivoted_data1

Unnamed: 0,CompanyName,Year,E_Score,S_Score,G_Score,ESG_Score
0,Amgen Inc.,2020,5.66,3.96,5.96,15.58
1,Amgen Inc.,2021,5.5,3.54,5.39,14.43
2,Amgen Inc.,2022,5.83,3.32,5.44,14.59
3,AstraZeneca plc,2020,6.08,4.04,6.01,16.13
4,AstraZeneca plc,2021,6.05,4.05,5.44,15.54
5,AstraZeneca plc,2022,6.23,4.11,5.12,15.46
6,"Avanos Medical, Inc.",2020,4.54,4.23,5.59,14.36
7,"Avanos Medical, Inc.",2021,4.98,4.17,5.76,14.91
8,"Avanos Medical, Inc.",2022,3.83,4.07,5.89,13.79
9,Bayer AG,2020,5.46,3.82,6.05,15.33


In [54]:
pivoted_data

Unnamed: 0,CompanyName,Industry,Sector,Year,Access_And_Affordability,Air_Quality,Business_Ethics,Business_Model_Resilience,Competitive_Behavior,Critical_Incident_Risk_Management,...,Product_Quality_And_Safety,Selling_Practices_And_Product_Labeling,Supply_Chain_Management,Systemic_Risk_Management,Waste_And_Hazardous_Materials_Management,Water_And_Wastewater_Management,E_Score,S_Score,G_Score,ESG_Score
0,Amgen Inc.,Drug Manufacturers - Major,Healthcare,2020,0.54,0.66,0.77,0.4,0.31,0.61,...,0.56,0.47,0.24,0.51,0.91,0.95,5.66,3.96,5.96,15.58
1,Amgen Inc.,Drug Manufacturers - Major,Healthcare,2021,0.55,0.92,0.69,0.42,0.27,0.59,...,0.67,0.42,0.22,0.49,0.76,0.81,5.5,3.54,5.39,14.43
2,Amgen Inc.,Drug Manufacturers - Major,Healthcare,2022,0.54,0.47,0.7,0.4,0.3,0.57,...,0.69,0.37,0.32,0.47,0.91,0.88,5.83,3.32,5.44,14.59
3,AstraZeneca plc,Drug Manufacturers - Major,Healthcare,2020,0.51,0.81,0.84,0.47,0.27,0.67,...,0.46,0.51,0.79,0.45,0.92,0.92,6.08,4.04,6.01,16.13
4,AstraZeneca plc,Drug Manufacturers - Major,Healthcare,2021,0.57,0.58,0.82,0.42,0.24,0.59,...,0.44,0.54,0.77,0.45,0.87,0.84,6.05,4.05,5.44,15.54
5,AstraZeneca plc,Drug Manufacturers - Major,Healthcare,2022,0.56,0.79,0.8,0.43,0.27,0.2,...,0.53,0.27,0.78,0.43,0.89,0.85,6.23,4.11,5.12,15.46
6,"Avanos Medical, Inc.",Medical Appliances & Equipment,Healthcare,2020,0.47,0.0,0.65,0.33,0.32,0.8,...,0.63,0.65,0.74,0.39,0.9,0.77,4.54,4.23,5.59,14.36
7,"Avanos Medical, Inc.",Medical Appliances & Equipment,Healthcare,2021,0.44,0.0,0.68,0.3,0.39,0.74,...,0.64,0.65,0.75,0.39,0.91,0.73,4.98,4.17,5.76,14.91
8,"Avanos Medical, Inc.",Medical Appliances & Equipment,Healthcare,2022,0.44,0.0,0.72,0.39,0.34,0.86,...,0.64,0.65,0.75,0.34,0.85,0.85,3.83,4.07,5.89,13.79
9,Bayer AG,Drug Manufacturers - Major,Healthcare,2020,0.55,0.73,0.66,0.37,0.36,0.73,...,0.63,0.56,0.7,0.55,0.32,0.91,5.46,3.82,6.05,15.33


In [55]:
pivoted_data.to_csv('esg_data.csv', index=False)