# Playground

## Import Libraries

In [1]:
import pandas as pd
import plotly.graph_objs as go
import plotly.offline as offline
import os
import numpy as np
from plotly.offline import plot
from pathlib import Path

In [2]:
offline.init_notebook_mode(connected=True)

In [3]:
path = Path(os.path.dirname (os.getcwd()))
XLSX_PATH = os.path.join(path.parent.parent.parent, 'sp100.xlsx')

## Compute total companies count and revenue by sector

In [4]:
# total number of companies
total_companies_count = 100

# total revenue of all companies

COLS_TO_USE = {
'companies':['company_id', 'company_name', 'SP100'],
'financials':['company_id', 'Revenue_num_tradingview'],
'reporting': ['company_id', 'last_reporting_year'],
'rat_1':['company_id', 'criteria_1_1','criteria_1_2'],
'rat_2':['company_id', 'criteria_2_1','criteria_2_2', 'assurance_type','verification_public','cdp_report_public'],
'rat_3':['company_id', 'criteria_3_1'],

}

all_df = []
for sheetname, cols in COLS_TO_USE.items():
    all_df.append(pd.read_excel(
        XLSX_PATH, 
        sheet_name = sheetname,
        engine = 'openpyxl', 
        usecols = cols
        ))
merged_df = all_df[0]
for i in range(len(COLS_TO_USE) - 1):
    merged_df = pd.merge(
        left = merged_df,
        right = all_df[i+1],
        how="left",
        on="company_id"
    )

cond1 = (merged_df['SP100'] == 1)

filter_cond = cond1 #& cond2
filter_cond = filter_cond
merged_df = merged_df.loc[filter_cond]

#total_companies_revenue_by_sector = merged_df[['Sector1', 'Revenue_num_tradingview']].groupby('Sector1').sum()

#total_companies_revenue_value = total_companies_revenue_by_sector['Revenue_num_tradingview'].sum()

#total_companies_revenue_value = total_companies_revenue_by_sector['Revenue_num_tradingview'].sum()

#total_companies_count_by_sector = merged_df[['Sector1', 'company_id']].groupby('Sector1').count()



In [5]:
def extract_stats_notables(cond, extract="stat"):
    
    selection_df = merged_df.loc[cond]
    selection_count = selection_df['company_id'].count()
    selection_notables = merged_df[~merged_df.company_id.isin(selection_df['company_id'])]
    notables_list = selection_notables.sort_values('Revenue_num_tradingview', ascending=False).iloc[:5,:]
    notables_list['company_id'] = pd.to_numeric(notables_list['company_id'], downcast='integer')
    notables_list_names = notables_list[['company_id', 'company_name']]
    print (notables_list_names)
    if extract == "stat":
        return selection_count
    elif extract == "notables":
        return notables_list_names
    else:
        return selection_count, notables_list_names

In [6]:
#Companies reporting GHG emissions
cond_public_reporting = merged_df['last_reporting_year']>=2019

#Companies reporting 2020 GHG emissions
cond_2020_reporting = merged_df['last_reporting_year']==2020

#Companies reporting at least 2 years for scope 1 and 2
cond_2_years = merged_df['criteria_1_1'] ==1

#Companies verifiying scope 1 and 2
cond_verif_s1s2 = merged_df['criteria_1_2'] =='full'

#Companies reporting partly or fully scope 3
cond_part_s3 = (merged_df['criteria_2_1'] =='partly') + (merged_df['criteria_2_1'] =='full')

#Companies reporting fully scope 3
cond_full_s3 = (merged_df['criteria_2_1'] =='full')

#Companies verifying fully scope 3
cond_verif_full_s3 = (merged_df['criteria_2_2'] =='full')

#Companies making their verification report public
cond_verif_public = (merged_df['verification_public'] ==2019) + (merged_df['verification_public'] ==2020)

#Companies with limited verification
cond_limited_verif = (merged_df['assurance_type'] =='limited')

#Companies reporting to CDP
cond_cdp_public = (merged_df['cdp_report_public'] =='yes')

#Companies making their CDP report public
cond_report_cdp = (merged_df['criteria_3_1'] > 0)


## Build the chart

In [7]:
cond_list = [
    cond_public_reporting,
    cond_2020_reporting,
    cond_2_years,
    cond_verif_s1s2,
    cond_part_s3 ,
    cond_full_s3 ,
    cond_verif_full_s3 ,
    #cond_verif_public ,
    #cond_limited_verif ,
    #cond_cdp_public,
    cond_report_cdp, 
    ]
companies_count = [extract_stats_notables(i) for i in cond_list]
notables_list = [extract_stats_notables(i, "notables") for i in cond_list]
x_cat=[
    'Some <b>public disclosures</b> exist about GHG emissions',
    '<b>Last year</b> emissions are reported as of September 2021',
    '<b>2 years</b> of data are available',
    'Scope 1-2 emissions are reported <b>and</b> verified',
    'Scope 3 emissions are <b>partly</b> reported' ,
    '<b>All emissions</b> (S1-S2-S3) are reported' ,
    'All emissions (S1-S2-S3) are reported <b>and</b> verified',
    #'verif_public',
    #'limited_verif' ,
   #'cdp_public',
    '2020 Climate change questionnaire disclosed to <b>CDP</b>'
]
fail_cat = [
    'No <b>public disclosures</b> exist about GHG emissions',
    '<b>Last year</b> emissions are not publicly reported as of September 2021',
    '<b>2 years</b> of data are not available',
    'Scope 1-2 emissions are not both reported <b>and</b> verified',
    'Scope 3 emissions are not publicly reported, not even <b>partly</b>' ,
    '<b>Not all emissions</b> (S1-S2-S3) are publicly reported' ,
    'All emissions (S1-S2-S3) are not both publicly reported <b>and</b> verified',
    #'verif_public',
    #'limited_verif' ,
   #'cdp_public',
    '2020 Climate change questionnaire are not submitted to <b>CDP</b>'
]

order_cat = [0, 7, 2, 3,1, 4, 5, 6]
disclosure_idx = [0, 2, 7]
completeness_idx = [4, 5]
verif_idx = [3, 6]
timeliness_idx = [1]

     company_id             company_name
16           17  Berkshire Hathaway Inc.
100         101                    Tesla
34           35                  Danaher
    company_id             company_name
98          99                  Walmart
16          17  Berkshire Hathaway Inc.
6            7     Alphabet Inc. Google
32          33                   Costco
82          83        Schlumberger Ltd.
    company_id                 company_name
16          17      Berkshire Hathaway Inc.
32          33                       Costco
46          47             General Electric
80          81        Raytheon Technologies
24          25  Charter Communications Inc.
    company_id             company_name
16          17  Berkshire Hathaway Inc.
41          42              Exxon Mobil
32          33                   Costco
44          45       Ford Motor Company
25          26                  Chevron
    company_id             company_name
87          88        Texas Instruments
16          

In [8]:
color_palette = ["#382A3D", "#70547A", "#AB80BA", "#E5ACFA"]

In [9]:
import plotly.graph_objects as go
trace_disclosure = go.Bar(
    y=[x_cat[idx] for idx in disclosure_idx],
    x=[companies_count[idx] for idx in disclosure_idx],
    name = 'Disclosure',
    text = [str(companies_count[idx])+'%' for idx in disclosure_idx],
    textposition='auto',
    orientation = 'h',
    marker = dict(
        color=color_palette[0],
        opacity = 1,
        line=dict(
            color=color_palette[0],
            width=1))
    )

trace_timeliness = go.Bar(
    y=[x_cat[i] for i in timeliness_idx],
    x=[companies_count[i] for i in timeliness_idx],
    name = 'Timeliness',
    text = [str(companies_count[i])+'%' for i in timeliness_idx],
    textposition='auto',
    orientation = 'h',
    marker = dict(
        color=color_palette[1],
        opacity = 1,
        line=dict(
            color=color_palette[1],
            width=1))
    )

trace_verification = go.Bar(
    y=[x_cat[i] for i in verif_idx],
    x=[companies_count[i] for i in verif_idx],
    name = 'Verification',
    text = [str(companies_count[i])+'%' for i in verif_idx],
    textposition='auto',
    orientation = 'h',
    marker = dict(
        color=color_palette[2],
        opacity = 1,
        line=dict(
            color=color_palette[2],
            width=1))
    )
trace_completeteness = go.Bar(
    y=[x_cat[i] for i in completeness_idx],
    x=[companies_count[i] for i in completeness_idx],
    name = 'Completeness',
    text = [str(companies_count[i])+'%' for i in completeness_idx],
    textposition='auto',
    orientation = 'h',
    marker = dict(
        color=color_palette[3],
        opacity = 1,
        line=dict(
            color=color_palette[3],
            width=1))
    )

data = [trace_disclosure, trace_timeliness, trace_verification, trace_completeteness]


In [10]:
annotations = [dict(xref='paper', yref='paper',
                        x=-0, y=-0.109,
                        text='Information based on public data as of September 2021.',
                        font=dict(family='Arial', size=10, color='rgb(150,150,150)'),
                        showarrow=False)]

In [11]:
path = Path(os.path.dirname (os.getcwd()))
logo_path = os.path.join(path.parent.parent.parent, 'django_project', 'django_project','static','django_project','images','logos','logo_66.png')

In [12]:
logo_path
#/Users/vincentmanier/Documents/django_project/django_project/django_project/static/django_project/images/logos/logo_66.png

'/Users/vincentmanier/Documents/django_project/django_project/django_project/static/django_project/images/logos/logo_66.png'

In [13]:
layout = go.Layout (barmode = 'group',
                   title = 'Carbon Footprint Transparency of Major US Companies',
                   titlefont = dict(family = 'Arial', size = 25),
                   showlegend = True,
                   legend=dict(
                       orientation="h",
                        yanchor="bottom",
                        y=-0.2,
                        xanchor="left",
                        x=0.5
                    ),
                    yaxis=dict(
                    showgrid=False,
                    showline=False,
                    showticklabels=True,
                    #domain=[0, 0.85],
                    categoryorder = 'array',
                    categoryarray = [x_cat[i] for i in order_cat]
                    ),
                    xaxis=dict(
                        zeroline=False,
                        showline=False,
                        showticklabels=True,
                        showgrid=True, 
                    ),
                    margin=dict(l=100, r=20, t=70, b=70),
                    paper_bgcolor='rgb(248, 248, 255)',
                    plot_bgcolor='rgb(248, 248, 255)',
                    annotations=annotations,
                )

fig = go.Figure(data = data, 
                layout = layout)


    
fig.show()

In [14]:
config = {'displaylogo': False,
        'responsive': True
        }

name_fig = 'transparency_analysis'


fig.write_image(
                "../../../django_project/static/django_project/images/charts/{}/{}.jpeg".format(
                    'general',
                    name_fig),
                scale=1,
                width = 1500,
                height = 500,
                )
plot(
    fig,
    config=config,
    filename = '../../../django_project/templates/django_project/home/charts/html_exports/{}/{}.html'.format(
        'general',
        name_fig),
    auto_open=False)

'../../../django_project/templates/django_project/home/charts/html_exports/general/transparency_analysis.html'

In [15]:
# table for top 5 Revenue Missing the Cut

In [16]:
import json
notables_dict = {}
for i in range(len(x_cat)):
    notables_dict[fail_cat[i]] = notables_list[i].to_dict('split')


In [17]:
notables_dict

{'No <b>public disclosures</b> exist about GHG emissions': {'index': [16,
   100,
   34],
  'columns': ['company_id', 'company_name'],
  'data': [[17, 'Berkshire Hathaway Inc.'], [101, 'Tesla'], [35, 'Danaher']]},
 '<b>Last year</b> emissions are not publicly reported as of September 2021': {'index': [98,
   16,
   6,
   32,
   82],
  'columns': ['company_id', 'company_name'],
  'data': [[99, 'Walmart'],
   [17, 'Berkshire Hathaway Inc.'],
   [7, 'Alphabet Inc. Google'],
   [33, 'Costco'],
   [83, 'Schlumberger Ltd.']]},
 '<b>2 years</b> of data are not available': {'index': [16, 32, 46, 80, 24],
  'columns': ['company_id', 'company_name'],
  'data': [[17, 'Berkshire Hathaway Inc.'],
   [33, 'Costco'],
   [47, 'General Electric'],
   [81, 'Raytheon Technologies'],
   [25, 'Charter Communications Inc.']]},
 'Scope 1-2 emissions are not both reported <b>and</b> verified': {'index': [16,
   41,
   32,
   44,
   25],
  'columns': ['company_id', 'company_name'],
  'data': [[17, 'Berkshire H

In [18]:
name_json = 'top5_transp_miss_cut'
filename = "../../../django_project/static/django_project/data/{}.json".format(
    name_json)
file_to_save = open(filename, "w")
json.dump (notables_dict, file_to_save)
file_to_save.close()