# Playground

## Import Libraries

In [28]:
import pandas as pd
import plotly.graph_objs as go
import plotly.offline as offline
import os
import numpy as np
from plotly.offline import plot
from pathlib import Path

In [29]:
offline.init_notebook_mode(connected=True)

# Heatmap of GHG Emissions Scopes by Sector

In [30]:
scope3_completeness = True

In [31]:
path = Path(os.path.dirname (os.getcwd()))
XLSX_PATH = os.path.join(path.parent.parent, 'sp100.xlsx')

COLS_TO_USE = {
'ghg_quant':['company_id', 'ghg_scope_1','ghg_loc_scope_2','ghg_mkt_scope_2', 'ghg_scope3_total', 'reporting_year', "Source"],
'grouping':['company_id', 'Sector1'],
'companies':['company_id', 'SP100'],
'ghg_qual':['company_id', 'scope3_completeness']
}

all_df = []
for sheetname, cols in COLS_TO_USE.items():
    all_df.append(pd.read_excel(
        XLSX_PATH, 
        sheet_name = sheetname,
        engine = 'openpyxl', 
        usecols = cols
        ))
merged_df = all_df[0]
for i in range(len(COLS_TO_USE) - 1):
    merged_df = pd.merge(
        left = merged_df,
        right = all_df[i+1],
        how="left",
        on="company_id"
    )
# Select Final Figures and Year 2019
cond1 = (merged_df['reporting_year'] == 2019)
cond2 = (merged_df['Source']== 'Final')
cond3 = (merged_df['SP100'] == 1)
filter_cond = cond1 & cond2 & cond3
if scope3_completeness : 
    cond4 = (merged_df['scope3_completeness'] == "full")
    filter_cond = filter_cond & cond4

merged_df = merged_df.loc[filter_cond]

#a =  merged_df[merged_df['company_id']==company_id]['Sector1']

In [32]:
merged_df


Unnamed: 0,company_id,Source,reporting_year,ghg_scope_1,ghg_loc_scope_2,ghg_mkt_scope_2,ghg_scope3_total,Sector1,SP100,scope3_completeness
6,1.0,Final,2019.0,4050000,1780000,1320000,8925800.0,Industrials,1.0,full
18,2.0,Final,2019.0,533000,518000,439000,14073000.0,Healthcare,1.0,full
42,4.0,Final,2019.0,18923,281489,214680,932653.0,Technology,1.0,full
54,5.0,Final,2019.0,11816,56113,43526,544093.0,Technology,1.0,full
78,7.0,Final,2019.0,66686,5116949,794267,3160000.0,Communication Services,1.0,full
90,8.0,Final,2019.0,154507,151259,147909,5227733.0,Consumer Staples,1.0,full
150,13.0,Final,2019.0,135954,173922,160360,2829355.0,Healthcare,1.0,full
162,14.0,Final,2019.0,50549,862127,0,25065200.0,Technology,1.0,full
186,16.0,Final,2019.0,62639,728771,17523,4221256.0,Financial Services,1.0,full
210,18.0,Final,2019.0,67031,36802,106,424537.0,Healthcare,1.0,full


In [33]:
scope1 = pd.to_numeric(merged_df['ghg_scope_1'], errors = 'coerce')

merged_df['ghg_loc_scope_2'] = pd.to_numeric(merged_df['ghg_loc_scope_2'], errors = 'coerce')
merged_df['ghg_mkt_scope_2'] = pd.to_numeric(merged_df['ghg_mkt_scope_2'], errors = 'coerce')
scope2 = merged_df[['ghg_loc_scope_2','ghg_mkt_scope_2']].min(axis=1)
#merged_df = merged_df.assign(ghg_scope_2 = scope2)
scope3 = merged_df['ghg_scope3_total']

total_scope = scope1 + scope2 + scope3

scope1_dist = scope1 / total_scope
scope2_dist = scope2 / total_scope
scope3_dist = scope3 / total_scope

#total_scope = merged_df.assign(ghg_total_scope = merged_df['ghg_scope_1'] + scope2 + merged_df['ghg_scope3_total'])
merged_df = merged_df.assign(scope1_dist = scope1_dist)
merged_df = merged_df.assign(scope2_dist = scope2_dist)
merged_df = merged_df.assign(scope3_dist = scope3_dist)



In [34]:
fields = ['scope1_dist','scope2_dist', 'scope3_dist', 'Sector1']
scope_by_sector_df = merged_df [fields]

scope_by_sector_df = scope_by_sector_df.dropna().sort_values("Sector1", ascending=True)
#data.sort_values("Salary", axis = 0, ascending = True,
#                 inplace = True, na_position ='first')

In [35]:
scope_by_sector_df

Unnamed: 0,scope1_dist,scope2_dist,scope3_dist,Sector1
1338,0.008276,0.162395,0.829329,Communication Services
78,0.016585,0.197532,0.785883,Communication Services
570,0.006122,0.014333,0.979544,Consumer Discretionary
1026,0.026163,0.019334,0.954503,Consumer Discretionary
738,0.002091,0.009612,0.988297,Consumer Discretionary
846,0.002916,0.013052,0.984032,Consumer Discretionary
906,0.064074,0.025707,0.91022,Consumer Staples
1182,0.032632,0.055752,0.911615,Consumer Staples
690,0.027072,0.032234,0.940694,Consumer Staples
342,0.012368,0.015619,0.972013,Consumer Staples


In [36]:
total = scope_by_sector_df.mean(numeric_only=True).rename('Total')
total['Sector1'] = "Total"

In [37]:
total

scope1_dist    0.073679
scope2_dist    0.069535
scope3_dist    0.856785
Sector1           Total
Name: Total, dtype: object

In [38]:
scope_by_sector_df = scope_by_sector_df.append(total, ignore_index=True)

In [39]:
scope_by_sector_df

Unnamed: 0,scope1_dist,scope2_dist,scope3_dist,Sector1
0,0.008276,0.162395,0.829329,Communication Services
1,0.016585,0.197532,0.785883,Communication Services
2,0.006122,0.014333,0.979544,Consumer Discretionary
3,0.026163,0.019334,0.954503,Consumer Discretionary
4,0.002091,0.009612,0.988297,Consumer Discretionary
5,0.002916,0.013052,0.984032,Consumer Discretionary
6,0.064074,0.025707,0.91022,Consumer Staples
7,0.032632,0.055752,0.911615,Consumer Staples
8,0.027072,0.032234,0.940694,Consumer Staples
9,0.012368,0.015619,0.972013,Consumer Staples


In [40]:
scope_by_sector_df = scope_by_sector_df.groupby('Sector1', sort=False).mean()

In [41]:
scope_by_sector_df

Unnamed: 0_level_0,scope1_dist,scope2_dist,scope3_dist
Sector1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Communication Services,0.01243,0.179964,0.807606
Consumer Discretionary,0.009323,0.014083,0.976594
Consumer Staples,0.031898,0.026917,0.941185
Energy,0.090437,0.004438,0.905125
Financial Services,0.016803,0.03018,0.953018
Healthcare,0.070305,0.045736,0.883959
Industrials,0.1722,0.031855,0.795945
Materials,0.268552,0.213161,0.518287
Real Estate,0.020585,0.286361,0.693054
Technology,0.028302,0.13558,0.836118


In [42]:
scope_by_sector_df


Unnamed: 0_level_0,scope1_dist,scope2_dist,scope3_dist
Sector1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Communication Services,0.01243,0.179964,0.807606
Consumer Discretionary,0.009323,0.014083,0.976594
Consumer Staples,0.031898,0.026917,0.941185
Energy,0.090437,0.004438,0.905125
Financial Services,0.016803,0.03018,0.953018
Healthcare,0.070305,0.045736,0.883959
Industrials,0.1722,0.031855,0.795945
Materials,0.268552,0.213161,0.518287
Real Estate,0.020585,0.286361,0.693054
Technology,0.028302,0.13558,0.836118


In [43]:
#CHECK
scope_by_sector_df['scope1_dist'] + scope_by_sector_df['scope2_dist'] + scope_by_sector_df['scope3_dist']

Sector1
Communication Services    1.0
Consumer Discretionary    1.0
Consumer Staples          1.0
Energy                    1.0
Financial Services        1.0
Healthcare                1.0
Industrials               1.0
Materials                 1.0
Real Estate               1.0
Technology                1.0
Utilities                 1.0
Total                     1.0
dtype: float64

In [44]:
x0 = scope_by_sector_df.index
y0=['Scope 1', 'Scope 2', 'Scope 3']
z0 = [scope_by_sector_df['scope1_dist'],
      scope_by_sector_df['scope2_dist'],
      scope_by_sector_df['scope3_dist']]

In [82]:
trace = go.Heatmap(
    z = z0,
    x = x0,
    y = y0,
    colorbar = dict(tickformat = "%"),
    colorscale = 'amp')

In [83]:
list(scope_by_sector_df.index)

['Communication Services',
 'Consumer Discretionary',
 'Consumer Staples',
 'Energy',
 'Financial Services',
 'Healthcare',
 'Industrials',
 'Materials',
 'Real Estate',
 'Technology',
 'Utilities',
 'Total']

In [84]:
if scope3_completeness:
    title = "Real Picture Based on Transparent Companies"
else:
    title = "What is Currently Reported"
title = ""

In [85]:
layout = go.Layout (
    title = title,
    titlefont = dict(family = 'Arial', size = 25),
    plot_bgcolor = 'antiquewhite',
)

In [86]:
fig = go.Figure(data = [trace], 
                layout = layout)
offline.iplot(fig)

In [87]:
config = {'displaylogo': False,
        'responsive': True
        }
if scope3_completeness:
    name_fig = 'real_scope_heatmap'
else:
    name_fig = 'reported_scope_heatmap'

fig.write_image(
                "../../django_project/static/django_project/images/charts/{}/{}.jpeg".format(
                    'general',
                    name_fig),
                scale=1,
                )
plot(
    fig,
    config=config,
    filename = '../../django_project/templates/django_project/home/charts/html_exports/{}/{}.html'.format(
        'general',
        name_fig),
    auto_open=False)

'../../django_project/templates/django_project/home/charts/html_exports/general/real_scope_heatmap.html'