# NewCo PNG Creator
Feb 8, 2020

**Goal:** Aim of this notebook is to autogenerate some PNG files to aid in any slide that needs to be created for NewCo pitch meetings. List of images we're going to include here:
* Selector for Urban Area
* PNG showing breakdown of sector emissions (annual)
* Data Table showing peak emissions by sector (January vs July for example)
* Similar city bar chart
* Similar city scatter plots
* Weekly emission charts
* Hourly emission charts

**Style Notes:**
* We'll be using grayscale for a lot of these PNGs, so a lot of these outputs will look a little strange

**Notes:** In order to run this notebook file, you will need Plotly and Kaleido installed as packages. See here for dependencies for writing images:
https://plotly.com/python/static-image-export/

In [1]:
import sqlalchemy
import pandas as pd
import datetime
import geopandas as gpd
import plotly.express as px
import plotly.io as pio
import numpy as np
import gc
from keplergl import KeplerGl
import math
import seaborn as sns
import plotly.graph_objects as go
from ipywidgets import widgets, interact

pd.options.mode.chained_assignment = None  # default='warn'

from IPython.display import Markdown, display
def printmd(string):
    display(Markdown(string))

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pio.templates.default = "none"
%config InlineBackend.figure_format ='retina'

In [2]:
# Read in datasets
year_df = pd.read_pickle('output_data/year_geo.pkl')
month_df = pd.read_pickle('output_data/month_geo.pkl')
week_df = pd.read_pickle('output_data/week_geo.pkl')

pop_df = pd.read_excel('./raw_data_not_on_github/ua_list_ua.xls')
pop_df['GEOID10'] = pop_df.UACE
pop_df['Population'] = pop_df.POP


#Add in population data just for annual data
year_df = year_df.merge(pop_df[['GEOID10','Population']],
              how='left',
              on = 'GEOID10')
year_df['tC per Capita'] = year_df.tC / year_df.Population
year_df.sample(10)

Unnamed: 0,GEOID10,Sector,Urban Area,Land Area,Water Area,geometry,Year,tC,Population,tC per Capita
25047,61462,Total,"Needville, TX",4267753,2161,"POLYGON ((-95.85100 29.39864, -95.84991 29.397...",2015,2450.516619,,
7534,18760,residential,"Collinsville, OK",7294670,14217,"MULTIPOLYGON (((-95.80990 36.35647, -95.80540 ...",2015,732.403159,,
26394,64999,elec_prod,"Oil City, PA",23804856,621677,"MULTIPOLYGON (((-79.69460 41.48174, -79.69227 ...",2015,0.0,,
16788,41320,cement,"Indiantown, FL",7005131,108577,"MULTIPOLYGON (((-80.43360 27.01701, -80.42642 ...",2015,0.0,,
19567,47854,rail,"Laredo, TX",170141217,185164,"MULTIPOLYGON (((-99.46154 27.62025, -99.45985 ...",2015,2827.302341,235730.0,0.011994
18038,44398,rail,"Kenedy, TX",10297163,47288,"MULTIPOLYGON (((-97.85770 28.80425, -97.85663 ...",2015,0.0,,
29075,71749,cement,"Pottstown, PA",204080828,1779076,"MULTIPOLYGON (((-75.66273 40.17378, -75.66270 ...",2015,0.0,107682.0,0.0
32970,81874,cmv,"Sidney, OH",29559961,342606,"MULTIPOLYGON (((-84.21425 40.27728, -84.21086 ...",2015,0.0,,
38325,95077,airport,"Wichita, KS",556098032,12308451,"MULTIPOLYGON (((-97.13065 37.72611, -97.12633 ...",2015,17721.760921,472870.0,0.037477
8662,21556,elec_prod,"Crystal Springs, MS",10302954,0,"MULTIPOLYGON (((-90.37579 31.98071, -90.37457 ...",2015,0.0,,


In [3]:
# Let's do some quick cleaning up of Sector titles
df_list = [year_df,month_df,week_df]
for df in df_list:
    df['Sector'] = df.Sector.replace({'airport':'Airport',
                                        'cement':'Cement',
                                        'cmv':'CMV',
                                        'commercial':'Commercial',
                                        'elec_prod':'Electricity Production',
                                        'industrial':'Industrial',
                                        'nonroad':'Non-Road',
                                        'onroad':'On-Road',
                                        'rail':'Rail',
                                        'residential':'Residential'})
# year_df.sample(5)
# month_df.sample(5)
# week_df.sample(5)

In [4]:
city_list = year_df['Urban Area'].sort_values().unique()
city_selector = widgets.Combobox(
    value = 'San Francisco--Oakland, CA',
    options = list(city_list),
    description = 'City Selector:',
    disabled = False,
    ensure_option = True
)
city_selector

Combobox(value='San Francisco--Oakland, CA', description='City Selector:', ensure_option=True, options=('Abbev…

#### Instructions: Re-run the cells below after making a city selection above

In [21]:
city_select = city_selector.value
print('Creating charts for: '+city_select)

Creating charts for: Denver--Aurora, CO


In [22]:
newco_template = dict(layout=go.Layout(title_font=dict(family="Courier New", size=22),
                     font = dict(family="Courier New"),
                     font_color = '#ffffff',
                     paper_bgcolor='rgba(0,0,0,0)', ##434343
                     plot_bgcolor='rgba(0,0,0,0)',
                     xaxis = dict(showgrid = False,color = '#ffffff'),
                     yaxis = dict(showgrid = False,color = '#ffffff')))
newco_template = dict()

In [23]:
# Chart 1: Sector breakdown
t_df = year_df[year_df['Urban Area'] == city_select]

#bar_color = ['#f3f3f3',] * 10


fig1 = px.bar(t_df[t_df.Sector == 'Total'],y='Sector',x='tC',height=200,
              title='Total Emissions (tC) | ' + city_select,color_discrete_sequence = bar_color)
fig1 = fig1.update_layout(template=newco_template)

fig2 = px.bar(t_df[t_df.Sector != 'Total'],y='Sector',x='tC',color='Sector',
              title='Sector Emissions (tC) | '+ city_select)
fig2 = fig2.update_layout(template=newco_template)
fig1.write_image('output_images/total_emissions.png')
fig2.write_image('output_images/sector_emissions.png')

fig1
fig2

In [24]:
# Table 1: Data Table of emissions by Sector
t_df = year_df[year_df['Urban Area'] == city_select]
o_df = t_df[['Sector','tC']]
o_df.columns = ['Sector','Annual Emissions (tC)']
o_df['Percent of City Total Emissions'] = o_df['Annual Emissions (tC)'] / \
                                          float(o_df[o_df.Sector == 'Total']['Annual Emissions (tC)'])
o_df = o_df.sort_values(by='Percent of City Total Emissions',ascending = False)
o_df.style.format(formatter={'Annual Emissions (tC)':"{:,.0f}",
                             'Percent of City Total Emissions':'{:,.1%}'.format})

Unnamed: 0,Sector,Annual Emissions (tC),Percent of City Total Emissions
9471,Total,4593660,100.0%
9479,On-Road,2708526,59.0%
9481,Residential,954695,20.8%
9475,Commercial,552412,12.0%
9476,Electricity Production,314893,6.9%
9472,Airport,54904,1.2%
9480,Rail,8147,0.2%
9478,Non-Road,83,0.0%
9473,Cement,0,0.0%
9474,CMV,0,0.0%


In [25]:
# Table 2: Data Table of Highest Monthly Emissions
t_df = month_df[month_df['Urban Area'] == city_select]
#Identify highest and lowest emission months
high_month = t_df.loc[t_df[t_df.Sector=='Total']['tC'].idxmax()].Month
high_month_string = high_month.strftime('%B')
low_month = t_df.loc[t_df[t_df.Sector=='Total']['tC'].idxmin()].Month
low_month_string = low_month.strftime('%B')

o_df = t_df[t_df.Month.isin([high_month,low_month])][['Sector','Month','tC']]
o_df['Month'] = o_df.Month.dt.strftime('%B')
o_df = o_df.pivot(index='Sector',
           columns = 'Month',
           values = 'tC')
o_df = o_df.sort_values(by=high_month_string,ascending = False)
o_df['Percent Increase'] = o_df[high_month_string] / o_df[low_month_string] - 1
o_df = o_df.rename({high_month_string:high_month_string + ' (Peak Emissions)',
                    low_month_string:low_month_string + ' (Lowest Emissions)'},axis = 'columns')
cm = sns.light_palette("red", as_cmap=True)
o_df = o_df.style.format(formatter={high_month_string + ' (Peak Emissions)':"{:,.0f}",
                              low_month_string + ' (Lowest Emissions)':"{:,.0f}",
                             'Percent Increase':'{:,.0%}'.format})

o_df

Month,August (Lowest Emissions),December (Peak Emissions),Percent Increase
Sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Total,292478,557092,90%
On-Road,239650,227703,-5%
Residential,13778,183028,"1,228%"
Commercial,9803,112817,"1,051%"
Electricity Production,22608,28730,27%
Airport,5938,4118,-31%
Rail,692,692,0%
Non-Road,8,5,-39%
CMV,0,0,nan%
Cement,0,0,nan%


In [26]:
# Chart 2: Weekly Emissions by Sector
t_df = week_df[(week_df['Urban Area'] == city_select) &
               (week_df.Sector != 'Total')]
fig = px.area(t_df,x='Week',y='tC',color = 'Sector',
        title = 'Weekly Emissions by Sector (tC) | ' + city_select,
        )
fig.write_image('output_images/weekly_sector_emissions_area.png')
fig

fig = px.line(t_df,x='Week',y='tC',color = 'Sector',
        title = 'Weekly Emissions by Sector (tC) | ' + city_select)
fig.write_image('output_images/weekly_sector_emissions_line.png')
fig

In [27]:
# Chart 3: Similar Cities by Emissions (Total + Per Capita)
t_df = year_df[year_df.Sector == 'Total'].sort_values(by='tC',ascending = False).reset_index()
row_num = t_df[t_df['Urban Area'] == city_select].index[0]
x_df = t_df.loc[[row_num - 3, row_num - 2,row_num -1, row_num, row_num + 1, row_num + 2]]
colors = ['lightslategray',] * 6
colors[3] = 'crimson'
fig = px.bar(x_df,x='Urban Area',y='tC',color_discrete_sequence = colors,color = 'Urban Area',
            title = 'Nearest Cities in Total Emissions (tC) | '+ city_select,height=400,width = 800)
fig.write_image('output_images/similar_cities_total_emissions.png')
fig

t_df = year_df[year_df.Sector == 'Total'].sort_values(by='tC per Capita',ascending = False).reset_index()
row_num = t_df[t_df['Urban Area'] == city_select].index[0]
x_df = t_df.loc[[row_num - 3, row_num - 2,row_num -1, row_num, row_num + 1, row_num + 2]]
colors = ['lightslategray',] * 6
colors[3] = 'crimson'
fig = px.bar(x_df,x='Urban Area',y='tC per Capita',color_discrete_sequence = colors,color = 'Urban Area',
            title = 'Nearest Cities in per Capita Total Emissions | '+ city_select,height=400,width = 800)
fig.write_image('output_images/similar_cities_per_capita_emissions.png')
fig

#Scatter chart showing how city stacks up against others on per capita emissions (available for only certain citie)
t_df = year_df[year_df.Sector == 'Total']
# #Next we want to write a function to identify cities most similar in terms of per capita emissions
# for s in year_df.Sector.unique():
#     t_df = year_df[year_df.Sector == s].sort_values(by='tC per Capita',ascending = False).reset_index()
#     row_num = t_df[t_df['Urban Area'] == city].index[0]
#     x_df = t_df.loc[[row_num - 3, row_num - 2,row_num -1, row_num, row_num + 1, row_num + 2]]
#     colors = ['lightslategray',] * 6
#     colors[3] = 'crimson'
#     px.bar(x_df,x='Urban Area',y='tC per Capita',color_discrete_sequence = colors,color = 'Urban Area',
#            title = 'Similar Cities by ' + s + ' per Capita Emissions',height=400,width = 700)

In [28]:
# Chart 4: Hourly Onroad Emissions Data (Data Intensive)
hour_df = pd.read_pickle('output_data/hour_geo.pkl')
t_df = hour_df[(hour_df['Urban Area'] == city_select) & (hour_df.Hour < '2015-02-01')].copy()
del hour_df
gc.collect()
t_df.head()

1929

Unnamed: 0,GEOID10,Sector,Urban Area,Land Area,Water Area,geometry,Hour,tC
3525,23527,airport,"Denver--Aurora, CO",1726058510,38089345,"MULTIPOLYGON (((-104.71571 39.52160, -104.7154...",2015-01-01,0.850665
7126,23527,elec_prod,"Denver--Aurora, CO",1726058510,38089345,"MULTIPOLYGON (((-104.71571 39.52160, -104.7154...",2015-01-01,34.149739
10727,23527,cement,"Denver--Aurora, CO",1726058510,38089345,"MULTIPOLYGON (((-104.71571 39.52160, -104.7154...",2015-01-01,
14328,23527,cmv,"Denver--Aurora, CO",1726058510,38089345,"MULTIPOLYGON (((-104.71571 39.52160, -104.7154...",2015-01-01,
17929,23527,rail,"Denver--Aurora, CO",1726058510,38089345,"MULTIPOLYGON (((-104.71571 39.52160, -104.7154...",2015-01-01,0.930013


In [29]:
# newco_template = dict(layout=go.Layout(title_font=dict(family="Courier New", size=22),
#                      font = dict(family="Courier New"),
#                      font_color = '#ffffff',
#                      paper_bgcolor='rgba(0,0,0,0)', ##434343
#                      plot_bgcolor='rgba(0,0,0,0)',
#                      xaxis = dict(showgrid = False,color = '#ffffff'),
#                      yaxis = dict(showgrid = False,color = '#ffffff')))

t_df['Week'] = t_df.Hour.dt.to_period('W').apply(lambda r: r.start_time)
t_df['Day of Week'] = t_df.Hour.dt.day_name()
t_df['Numeric Day of Week'] = t_df.Hour.dt.dayofweek
t_df['Hour of Day'] = t_df.Hour.dt.hour
t_df = t_df[t_df.Sector == 'onroad']
t_df = t_df[t_df.Hour >= '2015-01-05']
t_df = t_df.sort_values(by=['Week','Numeric Day of Week','Hour of Day'])

color_sequence = ['#c9daf8','#a4c2f4','#6d9eeb','#3c78d8']
fig = px.line(t_df[t_df.Sector == 'onroad'],x='Hour of Day',y='tC',color='Week',facet_col='Day of Week',
              title = 'Profile of Hourly Onroad Emissions | ' + city_select,
              color_discrete_sequence = color_sequence)
fig = fig.update_layout(template=newco_template)
fig = fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig = fig.update_xaxes(title = "")
fig.write_image('output_images/hourly_onroad_emissions.png')
fig