In [1]:
import os
import numpy as np
import pandas as pd
import pathlib


## Dict

In [2]:
# Reading the single file which has Question and principles
ques = pd.read_excel(r"C:\Users\Dell\Desktop\esg_q_principle.xlsx")

In [3]:
ques

Unnamed: 0,Element Name,Principles
0,CorporateIdentityNumber,
1,NameOfTheCompany,
2,YearOfIncorporation,
3,AddressOfRegisteredOfficeOfCompany,
4,AddressOfCorporateOfficeOfCompany,
...,...,...
509,RemarksOnApprovalByAuditCommittee,Principle 9
510,AmountOfRelatedPartyTransactionDuringTheReport...,Principle 9
511,AmountOfRelatedPartyTransaction,Principle 9
512,DetailsOfOtherRelatedPartyTransaction,Principle 9


In [4]:
# classifying All the questions in ESG (Environment, Social, Governance) category based on Principles
category = ques['Principles'].apply(lambda x: 'Environment' if (x == "Principle 2" or x=="Principle 6") else x)
category = category.apply(lambda x: 'Social' if (x == "Principle 3" or x=="Principle 4" or x=="Principle 5") else x)
category = category.apply(lambda x: 'Governance' if (x == "Principle 1" or x=="Principle 7" or x=="Principle 8" or x=="Principle 9") else x)
ques["esg_category"] = category

In [5]:
ques[190:]

Unnamed: 0,Element Name,Principles,esg_category
190,DoesTheEntityProvideTransitionAssistanceProgra...,Principle 3,Social
191,PercentageOfHealthAndSafetyPracticesOfValueCha...,Principle 4,Social
192,PercentageOfWorkingConditionsOfValueChainPartn...,Principle 4,Social
193,DetailsOfAnyCorrectiveActionTakenOrUnderwayToA...,Principle 4,Social
194,DescribeTheProcessesForIdentifyingKeyStakehold...,Principle 4,Social
...,...,...,...
509,RemarksOnApprovalByAuditCommittee,Principle 9,Governance
510,AmountOfRelatedPartyTransactionDuringTheReport...,Principle 9,Governance
511,AmountOfRelatedPartyTransaction,Principle 9,Governance
512,DetailsOfOtherRelatedPartyTransaction,Principle 9,Governance


In [6]:
# map element name to category through dictionary
ques_non_null = ques.dropna()# 120 were null hence removed while making the dictionary

dc_ques_category = {}
for element, category, principle in zip(ques_non_null["Element Name"].str.lower(), ques_non_null["esg_category"], ques_non_null["Principles"]):
    dc_ques_category[element] = [principle, category]

In [8]:
dc_ques_category   # this dictionary contains questions as key and esg category as value

{'percentageofrandd': ['Principle 2', 'Environment'],
 'detailsofimprovementsinenvironmentalandsocialimpactsduetorandd': ['Principle 2',
  'Environment'],
 'percentageofcapex': ['Principle 2', 'Environment'],
 'detailsofimprovementsinenvironmentalandsocialimpactsduetocapex': ['Principle 2',
  'Environment'],
 'doestheentityhaveproceduresinplaceforsustainablesourcing': ['Principle 2',
  'Environment'],
 'describetheprocessesinplacetosafelyreclaimyourproductsforreusingrecyclinganddisposingattheendoflifeforplasticsincludingpackagingexplanatorytextblock': ['Principle 2',
  'Environment'],
 'describetheprocessesinplacetosafelyreclaimyourproductsforreusingrecyclinganddisposingattheendoflifeforewasteexplanatorytextblock': ['Principle 2',
  'Environment'],
 'describetheprocessesinplacetosafelyreclaimyourproductsforreusingrecyclinganddisposingattheendoflifeforhazardouswasteexplanatorytextblock': ['Principle 2',
  'Environment'],
 'describetheprocessesinplacetosafelyreclaimyourproductsforreusing

## Adding principles column and  esg_category in all 169 files

In [9]:
# Accessing each xbrl file and adding Principle and esg_category column and labeling them according to the standard unique questions
path = r"C:\Users\Dell\Desktop\esg xbrl files copy"   # Path to the xbrl file folder

directory = pathlib.Path(path)
txt_files = directory.glob("*.xlsx") 
i=0
for file in txt_files:
    df = pd.read_excel(file)
    df["Principles"] = df["Element Name"].str.lower().apply(lambda x: dc_ques_category[x][0] if x in dc_ques_category.keys() else np.nan)
    df["esg_category"] = df["Element Name"].str.lower().apply(lambda x: dc_ques_category[x][1] if x in dc_ques_category.keys() else np.nan)
    df.to_excel(file, index=False)  # Saving the modified xbrl's

## bunch of files

In [11]:
path = r"C:\Users\Dell\Desktop\esg xbrl files copy"
directory = pathlib.Path(path)
file_path = directory.glob("*.xlsx")

# Function to calculate ESG SCORE
def esg_score(category):
    """
    Calculate the ESG (Environmental, Social, and Governance) score for a given category.

    Args:
        category (str): The category for which the ESG score is calculated.
        Select Category from : ["Environment", "Social", "Governance"]

    Returns:
        float: The calculated ESG score for the specified category, ranging between 0 and 1.
    """
    total = len(df[df.esg_category==category].Score)       # total questions of a category
    sum_of_1 = df[df.esg_category==category].Score.sum()   # count of 1's in score column from the total questions of particular category
    score = round(sum_of_1/total, 3)                       # score will be between 0 and 1.
    return score

# The final dataframe that has company identification no.(CIN), company name, E,S,G SCORE
final_df = pd.DataFrame(columns=["CIN", "COMPANY_NAME", "E_SCORE", "S_SCORE", "G_SCORE"])   

for file in file_path:
    df = pd.read_excel(file)
    
    # Calculating E,S,G SCORE for each class
    E_score = esg_score("Environment")
    S_score = esg_score("Social")
    G_score = esg_score("Governance")
    
    try:
        cin = df[df["Element Name"]=="CorporateIdentityNumber"]["Fact Value"][0] # Extracting CIN
    except Exception:
        cin = np.nan
        print(file)
    try:
        company_name = df[df["Element Name"]=="NameOfTheCompany"]["Fact Value"][1] # Extracting Company Name
    except:
        company_name = np.nan
    
    # Appending data for one company at a time in the final df
    data_dc = {"CIN":cin, "COMPANY_NAME":company_name, "E_SCORE":E_score, "S_SCORE":S_score, "G_SCORE":G_score}
    final_df.loc[len(final_df)] = data_dc
final_df

  score = round(sum_of_1/total, 3)                       # score will be between 0 and 1.


C:\Users\Dell\Desktop\esg xbrl files copy\br_rallis_2021_2022_20220800055.xlsx


Unnamed: 0,CIN,COMPANY_NAME,E_SCORE,S_SCORE,G_SCORE
0,L31300KA1987PLC013543,3M INDIA LIMITED,0.215,0.133,0.520
1,L65922RJ2011PLC034297,Aavas Financiers Limited,0.170,0.317,0.253
2,L32202KA1949PLC032923,ABB INDIA LIMITED,0.581,0.484,0.591
3,L51100GJ1993PLC019067,Adani Enterprises Limited (“AEL or the Company”),0.583,0.493,0.613
4,L40106GJ2015PLC082007,Adani Green Energy Limited,0.287,0.272,0.530
...,...,...,...,...,...
164,L29150TG1930PLC000576,VST INDUSTRIES LIMITED,0.443,0.383,0.436
165,L27100GJ1995PLC025609,WELSPUN CORP LIMITED,0.386,0.415,0.566
166,L17110GJ1985PLC033271,Welspun India Limited,0.413,0.393,0.451
167,L32102KA1945PLC020800,Wipro Limited,0.385,0.341,0.747


def esg_score(category):
    total = len(df[df.esg_category==category].Score)
    sum_of_1 = df[df.esg_category==category].Score.sum()
    score = round(sum_of_1/total, 3)
    return score

E_score = esg_score("Environment")
S_score = esg_score("Social")
G_score = esg_score("Governance")
print(E_score, S_score, G_score)

#cin and company name
cin = df[df["Element Name"]=="CorporateIdentityNumber"]["Fact Value"][0]

company_name = df[df["Element Name"]=="NameOfTheCompany"]["Fact Value"][1]

final_df = pd.DataFrame(columns=["CIN", "COMPANY_NAME", "E_SCORE", "S_SCORE", "G_SCORE"])

test_dc = {"CIN":"cin123", "COMPANY_NAME":"Tata", "E_SCORE":1, "S_SCORE":2, "G_SCORE":3}

final_df.loc[len(final_df)] = test_dc

In [264]:
final_df.to_excel(r"C:\Users\Dell\Desktop\esg_score.xlsx", index=False)