# NewCo PNG Creator
Feb 8, 2020

**Goal:** Aim of this notebook is to autogenerate some PNG files to aid in any slide that needs to be created for NewCo pitch meetings. List of images we're going to include here:
* Selector for Urban Area
* PNG showing breakdown of sector emissions (annual)
* Data Table showing peak emissions by sector (January vs July for example)
* Similar city bar chart
* Similar city scatter plots
* Weekly emission charts
* Hourly emission charts

**Style Notes:**
* We'll be using grayscale for a lot of these PNGs, so a lot of these outputs will look a little strange

**Notes:** In order to run this notebook file, you will need Plotly and Kaleido installed as packages. See here for dependencies for writing images:
https://plotly.com/python/static-image-export/

In [108]:
import sqlalchemy
import pandas as pd
import datetime
import geopandas as gpd
import plotly.express as px
import plotly.io as pio
import numpy as np
import gc
from keplergl import KeplerGl
from itertools import cycle
import math
import seaborn as sns
import plotly.graph_objects as go
from ipywidgets import widgets, interact

pd.options.mode.chained_assignment = None  # default='warn'

from IPython.display import Markdown, display
def printmd(string):
    display(Markdown(string))

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pio.templates.default = "none"
%config InlineBackend.figure_format ='retina'

In [2]:
# Read in datasets
year_df = pd.read_pickle('output_data/year_geo.pkl')
month_df = pd.read_pickle('output_data/month_geo.pkl')
week_df = pd.read_pickle('output_data/week_geo.pkl')

pop_df = pd.read_excel('./raw_data_not_on_github/ua_list_ua.xls')
pop_df['GEOID10'] = pop_df.UACE
pop_df['Population'] = pop_df.POP


#Add in population data just for annual data
year_df = year_df.merge(pop_df[['GEOID10','Population']],
              how='left',
              on = 'GEOID10')
year_df['tC per Capita'] = year_df.tC / year_df.Population
year_df.sample(10)

Unnamed: 0,GEOID10,Sector,Urban Area,Land Area,Water Area,geometry,Year,tC,Population,tC per Capita
32537,80740,residential,"Seville, OH",17841231,3989,"MULTIPOLYGON (((-81.87974 41.01971, -81.87716 ...",2015,1365.107375,,
19426,47476,Total,"Lancaster, KY",5871801,5750,"MULTIPOLYGON (((-84.55469 37.59820, -84.55344 ...",2015,3541.924438,,
22212,54631,cmv,"Marion, NC",40800422,182887,"MULTIPOLYGON (((-82.05707 35.65940, -82.05150 ...",2015,0.0,,
18360,45181,airport,"Kingsland, TX",23448576,918095,"POLYGON ((-98.48773 30.65312, -98.48570 30.659...",2015,117.967177,,
3640,8866,residential,"Bolivar, TN",12804200,0,"POLYGON ((-89.03738 35.28770, -89.03390 35.287...",2015,466.873366,,
5635,13996,cmv,"Carroll, IA",11948805,0,"MULTIPOLYGON (((-94.89591 42.07839, -94.89213 ...",2015,0.0,,
725,1873,residential,"Alva, OK",6803003,0,"POLYGON ((-98.69079 36.80492, -98.68272 36.804...",2015,1181.894073,,
36394,90182,industrial,"Vandenberg AFB, CA",4649545,0,"MULTIPOLYGON (((-120.54272 34.75353, -120.5367...",2015,0.084394,,
31073,77014,rail,"Rutland, VT",31777805,327356,"MULTIPOLYGON (((-72.98678 43.63638, -72.98639 ...",2015,0.0,,
4295,10567,elec_prod,"Brookfield, MO",7607997,17861,"POLYGON ((-93.10361 39.78048, -93.09967 39.780...",2015,0.0,,


In [3]:
# Let's do some quick cleaning up of Sector titles
df_list = [year_df,month_df,week_df]
for df in df_list:
    df['Sector'] = df.Sector.replace({'airport':'Airport',
                                        'cement':'Cement',
                                        'cmv':'CMV',
                                        'commercial':'Commercial',
                                        'elec_prod':'Electricity Production',
                                        'industrial':'Industrial',
                                        'nonroad':'Non-Road',
                                        'onroad':'On-Road',
                                        'rail':'Rail',
                                        'residential':'Residential'})
# year_df.sample(5)
# month_df.sample(5)
# week_df.sample(5)

In [4]:
city_list = year_df['Urban Area'].sort_values().unique()
city_selector = widgets.Combobox(
    value = 'San Francisco--Oakland, CA',
    options = list(city_list),
    description = 'City Selector:',
    disabled = False,
    ensure_option = True
)
city_selector

Combobox(value='San Francisco--Oakland, CA', description='City Selector:', ensure_option=True, options=('Abbev…

#### Instructions: Re-run the cells below after making a city selection above

In [171]:
city_select = city_selector.value
print('Creating charts for: '+city_select)

Creating charts for: Eugene, OR


In [172]:
newco_template = dict(layout=go.Layout(title_font=dict(family="Courier New", size=22),
                     font = dict(family="Courier New"),
                     font_color = '#ffffff',
                     paper_bgcolor='rgba(0,0,0,0)', ##434343
                     plot_bgcolor='rgba(0,0,0,0)',
                     xaxis = dict(showgrid = False,color = '#ffffff'),
                     yaxis = dict(showgrid = False,color = '#ffffff')))

In [173]:
# Chart 1: Sector breakdown
t_df = year_df[year_df['Urban Area'] == city_select]

bar_color = ['#f3f3f3',] * 10

fig1 = px.bar(t_df[t_df.Sector == 'Total'],y='Sector',x='tC',height=200,
              color_discrete_sequence = bar_color)
fig1 = fig1.update_layout(template=newco_template)
fig1 = fig1.update_xaxes(title = "")
fig1 = fig1.update_yaxes(title = "")

fig2 = px.bar(t_df[t_df.Sector != 'Total'],y='Sector',x='tC',
              color_discrete_sequence = bar_color)
fig2 = fig2.update_layout(template=newco_template)
fig2 = fig2.update_xaxes(title = "")
fig2 = fig2.update_yaxes(title = "")
fig1.write_image('output_images/total_emissions.png')
fig2.write_image('output_images/sector_emissions.png')

fig1
fig2

In [174]:
# Table 1: Data Table of emissions by Sector
t_df = year_df[year_df['Urban Area'] == city_select]
o_df = t_df[['Sector','tC']]
o_df.columns = ['Sector','Annual Emissions (tC)']
o_df['Percent of City Total Emissions'] = o_df['Annual Emissions (tC)'] / \
                                          float(o_df[o_df.Sector == 'Total']['Annual Emissions (tC)'])
o_df = o_df.sort_values(by='Percent of City Total Emissions',ascending = False)
o_df.style.format(formatter={'Annual Emissions (tC)':"{:,.0f}",
                             'Percent of City Total Emissions':'{:,.1%}'.format})

Unnamed: 0,Sector,Annual Emissions (tC),Percent of City Total Emissions
11385,Total,465834,100.0%
11393,On-Road,215336,46.2%
11391,Industrial,113342,24.3%
11389,Commercial,64522,13.9%
11392,Non-Road,40419,8.7%
11395,Residential,16459,3.5%
11386,Airport,11395,2.4%
11394,Rail,4362,0.9%
11387,Cement,0,0.0%
11388,CMV,0,0.0%


In [175]:
# Table 2: Data Table of Highest Monthly Emissions
t_df = month_df[month_df['Urban Area'] == city_select]
#Identify highest and lowest emission months
high_month = t_df.loc[t_df[t_df.Sector=='Total']['tC'].idxmax()].Month
high_month_string = high_month.strftime('%B')
low_month = t_df.loc[t_df[t_df.Sector=='Total']['tC'].idxmin()].Month
low_month_string = low_month.strftime('%B')

o_df = t_df[t_df.Month.isin([high_month,low_month])][['Sector','Month','tC']]
o_df['Month'] = o_df.Month.dt.strftime('%B')
o_df = o_df.pivot(index='Sector',
           columns = 'Month',
           values = 'tC')
o_df = o_df.sort_values(by=high_month_string,ascending = False)
o_df['Percent Increase'] = o_df[high_month_string] / o_df[low_month_string] - 1
o_df = o_df.rename({high_month_string:high_month_string + ' (Peak Emissions)',
                    low_month_string:low_month_string + ' (Lowest Emissions)'},axis = 'columns')
cm = sns.light_palette("red", as_cmap=True)
o_df = o_df.style.format(formatter={high_month_string + ' (Peak Emissions)':"{:,.0f}",
                              low_month_string + ' (Lowest Emissions)':"{:,.0f}",
                             'Percent Increase':'{:,.0%}'.format})

o_df

Month,December (Peak Emissions),May (Lowest Emissions),Percent Increase
Sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Total,45087,34847,29%
On-Road,16780,18675,-10%
Commercial,11906,2278,423%
Industrial,9914,8363,19%
Residential,2861,639,348%
Non-Road,2530,3523,-28%
Airport,724,1000,-28%
Rail,370,370,0%
CMV,0,0,nan%
Cement,0,0,nan%


In [176]:
# Chart 2: Weekly Emissions by Sector
t_df = week_df[(week_df['Urban Area'] == city_select) &
               (week_df.Sector != 'Total')]

fig = px.area(t_df,x='Week',y='tC',color = 'Sector',
        color_discrete_sequence = px.colors.sequential.Greys)
fig = fig.update_layout(template=newco_template)
fig = fig.update_xaxes(title = "")
fig = fig.update_yaxes(title = "")
fig.write_image('output_images/weekly_sector_emissions_area.png')
fig

fig = px.line(t_df,x='Week',y='tC',color = 'Sector',
        title = 'Weekly Emissions by Sector (tC) | ' + city_select)
fig.write_image('output_images/weekly_sector_emissions_line.png')
fig

In [181]:
# Chart 3: Similar Cities by Emissions (Total + Per Capita)
t_df = year_df[year_df.Sector == 'Total'].sort_values(by='tC',ascending = False).reset_index()
row_num = t_df[t_df['Urban Area'] == city_select].index[0]
x_df = t_df.loc[[row_num - 3, row_num - 2,row_num -1, row_num, row_num + 1, row_num + 2]]
colors = ['#999999',] * 6
colors[3] = '#ffffff'
fig = px.bar(x_df,x='Urban Area',y='tC',color_discrete_sequence = colors,color = 'Urban Area',
             height=400,width = 800)
fig = fig.update_layout(template=newco_template)
fig = fig.update_xaxes(title = "")
fig = fig.update_yaxes(title = "")
fig.write_image('output_images/similar_cities_total_emissions.png')
fig

t_df = year_df[year_df.Sector == 'Residential'].sort_values(by='tC',ascending = False).reset_index()
row_num = t_df[t_df['Urban Area'] == city_select].index[0]
x_df = t_df.loc[[row_num - 3, row_num - 2,row_num -1, row_num, row_num + 1, row_num + 2]]
colors = ['#999999',] * 6
colors[3] = '#ffffff'
fig = px.bar(x_df,x='Urban Area',y='tC',color_discrete_sequence = colors,color = 'Urban Area',
             height=400,width = 800)
fig = fig.update_xaxes(title = "")
fig = fig.update_yaxes(title = "")
fig = fig.update_layout(template=newco_template)
fig.write_image('output_images/similar_cities_total_emissions.png')
fig

# t_df = year_df[year_df.Sector == 'Total'].sort_values(by='tC per Capita',ascending = False).reset_index()
# row_num = t_df[t_df['Urban Area'] == city_select].index[0]
# x_df = t_df.loc[[row_num - 3, row_num - 2,row_num -1, row_num, row_num + 1, row_num + 2]]
# colors = ['#999999',] * 6
# colors[3] = '#ffffff'
# fig = px.bar(x_df,x='Urban Area',y='tC per Capita',color_discrete_sequence = colors,color = 'Urban Area',
#             title = 'Nearest Cities in per Capita Total Emissions | '+ city_select,height=400,width = 800)
# fig = fig.update_layout(template=newco_template)
# fig.write_image('output_images/similar_cities_per_capita_emissions.png')
# fig

#Scatter chart showing how city stacks up against others on per capita emissions (available for only certain citie)
t_df = year_df[year_df.Sector == 'Total']
# #Next we want to write a function to identify cities most similar in terms of per capita emissions
# for s in year_df.Sector.unique():
#     t_df = year_df[year_df.Sector == s].sort_values(by='tC per Capita',ascending = False).reset_index()
#     row_num = t_df[t_df['Urban Area'] == city].index[0]
#     x_df = t_df.loc[[row_num - 3, row_num - 2,row_num -1, row_num, row_num + 1, row_num + 2]]
#     colors = ['lightslategray',] * 6
#     colors[3] = 'crimson'
#     px.bar(x_df,x='Urban Area',y='tC per Capita',color_discrete_sequence = colors,color = 'Urban Area',
#            title = 'Similar Cities by ' + s + ' per Capita Emissions',height=400,width = 700)

In [178]:
#Scatter Plot (Static)
t_df = year_df[year_df.Sector == 'Total']
fig = px.scatter(t_df,x='Population',y='tC',title = 'Emissions vs Population (Top 400 Cities)',width = 700,height = 400)
fig = fig.update_traces(marker=dict(size=12,
                                    color='#d9d9d9',
                                    line=dict(width=2,
                                      color='#ffffff')))
fig = fig.update_layout(template = newco_template)
fig

In [179]:
# Chart 4: Hourly Onroad Emissions Data (Data Intensive)
hour_df = pd.read_pickle('output_data/hour_geo.pkl')
hour_df_lite = hour_df[(hour_df['Urban Area'] == city_select) & (hour_df.Hour < '2015-02-01')].copy()
del hour_df
hour_df_lite.head()

Unnamed: 0,GEOID10,Sector,Urban Area,Land Area,Water Area,geometry,Hour,tC
24,28117,airport,"Eugene, OR",225265401,118851,"MULTIPOLYGON (((-122.88399 44.04685, -122.8840...",2015-01-01,0.252467
3625,28117,elec_prod,"Eugene, OR",225265401,118851,"MULTIPOLYGON (((-122.88399 44.04685, -122.8840...",2015-01-01,
7226,28117,cement,"Eugene, OR",225265401,118851,"MULTIPOLYGON (((-122.88399 44.04685, -122.8840...",2015-01-01,
10827,28117,cmv,"Eugene, OR",225265401,118851,"MULTIPOLYGON (((-122.88399 44.04685, -122.8840...",2015-01-01,
14428,28117,rail,"Eugene, OR",225265401,118851,"MULTIPOLYGON (((-122.88399 44.04685, -122.8840...",2015-01-01,0.497924


In [180]:
t_df = hour_df_lite
newco_template = dict(layout=go.Layout(title_font=dict(family="Courier New", size=22),
                     font = dict(family="Courier New"),
                     font_color = '#ffffff',
                     paper_bgcolor='rgba(0,0,0,0)', ##434343
                     plot_bgcolor='rgba(0,0,0,0)',
                     xaxis = dict(showgrid = False,color = '#ffffff'),
                     yaxis = dict(showgrid = False,color = '#ffffff')))

t_df['Week'] = t_df.Hour.dt.to_period('W').apply(lambda r: r.start_time)
t_df['Day of Week'] = t_df.Hour.dt.day_name()
t_df['Numeric Day of Week'] = t_df.Hour.dt.dayofweek
t_df['Hour of Day'] = t_df.Hour.dt.hour
t_df = t_df[t_df.Sector == 'onroad']
t_df = t_df[t_df.Hour >= '2015-01-05']
t_df = t_df.sort_values(by=['Week','Numeric Day of Week','Hour of Day'])

color_sequence = ['#ffffff','#efefef','#cccccc','#999999']
fig = px.line(t_df[t_df.Sector == 'onroad'],x='Hour of Day',y='tC',color='Week',facet_col='Day of Week',
              color_discrete_sequence = color_sequence)
fig = fig.update_layout(template=newco_template)
fig = fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig = fig.update_xaxes(title = "")
fig = fig.update_yaxes(title = "")
fig.write_image('output_images/hourly_onroad_emissions.png')
fig