# Determining the MSCI-ESG Rating 

## Semantic similarity computation

In [67]:
import semantic_eval

# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2 # notebook will reload external python modules;
%matplotlib inline # make figures appear inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
results = semantic_eval.main()

Preprocessing...
Loading model...
Embedding...
Calculating similarity...
Classifying sentences...


In [11]:
res_df = semantic_eval.get_results_dataframe(results, sort='avg_score')
res_df

Unnamed: 0,key_term,avg_score,max_score,sentences
0,Opposition des parties prenantes,0.194199,0.426371,[procèsverbal de la séance du 12 décembre 2022...
1,Opportunités Environnementals,0.173399,0.500211,[ils ont obtenu laccord ce matin dune p...
2,Changement climatique,0.140987,0.33624,[les municipaux claude uldry alexandre \nd...
3,pollution et déchets,0.140179,0.438968,[ m le président donne lecture de la lettre d...
4,Responsabilité du produit,0.117823,0.311558,[appel \n\n87 conseillères et conseillers so...
5,capital humain,0.108223,0.389666,"[4, 5, cette tâche est effectuée au droit des ..."
6,Gouvernance d'entreprise,0.093679,0.299914,[seance du conseil communal du 30 janvier 2023...
7,Opportunités sociales,0.0927,0.283866,[dans ces conditions difficiles ils peuvent im...
8,Comportement d'entreprise,0.084962,0.23259,[pour arriver à cette somme la participation d...
9,Capital Naturel,0.077399,0.417943,[ne pouvant agir partout en même \ntemps lors ...


In [9]:
res_df.to_csv('./results/semantic_eval.csv', index=False)

## Sentiment Analysis

In [69]:
from transformers import pipeline
from tqdm import tqdm

analyzer = pipeline(
    task='text-classification',
    model="cmarkea/distilcamembert-base-sentiment",
    tokenizer="cmarkea/distilcamembert-base-sentiment"
)


MSCI_rating = {
    'AAA': [8.571,10],
    'AA': [7.143,8.571],
    'A': [5.714,7.143],
    'BBB': [4.286,5.714],
    'BB': [2.857,4.286],
    'B': [1.429,2.857],
    'CCC': [0,1.429]
}

def compute_rating(sentence,analyzer, return_all_scores=False):
    result = analyzer(sentence, return_all_scores=return_all_scores)
    if return_all_scores:
        rating = sum(int(r['label'].split()[0]) * r['score'] for r in result[0])*2
    else:
        rating = int(result[0]['label'].split()[0])*2
    
    # get the number rating from the pipeline result
    
    return rating


def get_msci_rating(expected_rating):
    for rating, bounds in MSCI_rating.items():
        if bounds[0] <= expected_rating <= bounds[1]:
            return rating

res_df['rating'] = 0
res_df['MSCI_rating'] = None
for i,kt in tqdm(enumerate(res_df['key_term'])):
    rating = 0
    for sentence in res_df['sentences'][i]:
        rating += compute_rating(sentence, analyzer, return_all_scores=False)
    avg_rating = rating/len(res_df['sentences'][i])
    res_df.loc[i, 'rating'] = round(avg_rating, 2)
    res_df.loc[i, 'MSCI_rating'] = get_msci_rating(res_df.loc[i, 'rating'])

10it [00:12,  1.27s/it]


The following result is obtained by simply taking highest note from the output that is out of 5 and multipying it by 2 to get a rating out of 10. We then compute the average for each category. 

In [70]:
print(f"\t\t\t MSCI-ESG Rating for CCPV-230130. Sorted by most popular topics (avg_score)")
res_df

			 MSCI-ESG Rating for CCPV-230130. Sorted by most popular topics (avg_score)


Unnamed: 0,key_term,avg_score,max_score,sentences,rating,MSCI_rating
0,Opposition des parties prenantes,0.194199,0.426371,[procèsverbal de la séance du 12 décembre 2022...,6.13,A
1,Opportunités Environnementals,0.173399,0.500211,[ils ont obtenu laccord ce matin dune p...,6.86,A
2,Changement climatique,0.140987,0.33624,[les municipaux claude uldry alexandre \nd...,6.14,A
3,pollution et déchets,0.140179,0.438968,[ m le président donne lecture de la lettre d...,5.45,BBB
4,Responsabilité du produit,0.117823,0.311558,[appel \n\n87 conseillères et conseillers so...,6.94,A
5,capital humain,0.108223,0.389666,"[4, 5, cette tâche est effectuée au droit des ...",7.45,AA
6,Gouvernance d'entreprise,0.093679,0.299914,[seance du conseil communal du 30 janvier 2023...,6.71,A
7,Opportunités sociales,0.0927,0.283866,[dans ces conditions difficiles ils peuvent im...,6.44,A
8,Comportement d'entreprise,0.084962,0.23259,[pour arriver à cette somme la participation d...,4.0,BB
9,Capital Naturel,0.077399,0.417943,[ne pouvant agir partout en même \ntemps lors ...,6.86,A


the result of the output from the model gives us the the probability for each note out of 5. So in this case, we multiply each mark by its probability and then multiply it by 2. And then we compute the average marks for each category.

In [63]:
print(f"\t\t\t MSCI-ESG Expected Rating for CCPV-230130. Sorted by most popular topics (avg_score)")
res_df

			 MSCI-ESG Expected Rating for CCPV-230130. Sorted by most popular topics (avg_score)


Unnamed: 0,key_term,avg_score,max_score,sentences,rating,MSCI_rating
0,Opposition des parties prenantes,0.194199,0.426371,[procèsverbal de la séance du 12 décembre 2022...,6.176016,A
1,Opportunités Environnementals,0.173399,0.500211,[ils ont obtenu laccord ce matin dune p...,6.555781,A
2,Changement climatique,0.140987,0.33624,[les municipaux claude uldry alexandre \nd...,6.112144,A
3,pollution et déchets,0.140179,0.438968,[ m le président donne lecture de la lettre d...,5.692588,BBB
4,Responsabilité du produit,0.117823,0.311558,[appel \n\n87 conseillères et conseillers so...,6.6924,A
5,capital humain,0.108223,0.389666,"[4, 5, cette tâche est effectuée au droit des ...",6.945482,A
6,Gouvernance d'entreprise,0.093679,0.299914,[seance du conseil communal du 30 janvier 2023...,6.213063,A
7,Opportunités sociales,0.0927,0.283866,[dans ces conditions difficiles ils peuvent im...,6.532227,A
8,Comportement d'entreprise,0.084962,0.23259,[pour arriver à cette somme la participation d...,5.141696,BBB
9,Capital Naturel,0.077399,0.417943,[ne pouvant agir partout en même \ntemps lors ...,6.69397,A
