## Load Data

In [None]:
import os, sys, time
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
plt.rcParams['figure.dpi'] = 240 # fix high-dpi display scaling issues (only if you have a 4k monitor)

sys.path.append(os.getcwd()) # add cwd to path

from zip_codes import ZC # zip code database
import load_file as lf # file i/o
import myplots as mp # my plotting functions
import file_reshaper as fr # file reshaper

zc = ZC(fdir='') # initialize zip code class

In [None]:
data_dir = r'C:\PythonBC\RootData'

fname = 'clicks.gzip'
df_clicks = lf.temp_load( os.path.join(data_dir, fname)  )

fname = 'state.gzip'
df_state = lf.temp_load( os.path.join(data_dir, fname)  )

fname = 'installs.gzip'
df_installs = lf.temp_load( os.path.join(data_dir, fname)  )

frames = [df_state, df_clicks, df_installs]
df = pd.concat(frames, axis=1)

In [None]:
# number of clicks per state
dfstates = df.groupby('state').sum()['clicks'].to_frame()
dfstates.reset_index(level=0, inplace=True)

# number of bids per state
dfstates2 = df.groupby('state').count()['clicks'].to_frame()
dfstates2.reset_index(level=0, inplace=True)
dfstates2.rename(index=str, columns={"state": "state", "clicks": "bids"})

# number of installs per state
dfstates3 = df.groupby('state').sum()['installs'].to_frame()
dfstates3.reset_index(level=0, inplace=True)
dfstates3.rename(index=str, columns={"state": "state", "clicks": "installs"})

# build new dataframe
bids = dfstates2.clicks.values
clicks = dfstates.clicks.values
installs = np.asarray(dfstates3.installs)
state = dfstates.state.values
clickrate = 100*np.divide(clicks, bids)
installrate = 100*np.divide(installs, bids)
frames = {"state": state, "bids": bids, "clicks": clicks, "installs": installs, "clickrate":clickrate, "installrate": installrate}
df_rate = pd.DataFrame(data=frames)
df_rate_nonzero = df_rate[df_clickrate.clickrate > 0]

## Make a folium map

In [None]:
import folium
#url = 'https://raw.githubusercontent.com/python-visualization/folium/master/examples/data'
#state_geo = f'{url}/us-states.json'
fname = 'us-states.json' # local copy of json file

In [None]:
m = folium.Map(location=[39.50, -98.35], zoom_start=4) # lower 48
folium.Choropleth(
    geo_data=state_geo,
    name='choropleth',
    data=df_rate_nonzero,
    columns=['state', 'clickrate'],
    key_on='feature.id',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='click rate (%)'
).add_to(m)
folium.LayerControl().add_to(m)
m

In [None]:
save_path = r'C:\PythonBC\RootData'
m.save(os.path.join(save_path, 'clickrate.html'))

In [None]:
m = folium.Map(location=[39.50, -98.35], zoom_start=4) # lower 48
folium.Choropleth(
    geo_data=state_geo,
    name='choropleth',
    data=df_rate_nonzero,
    columns=['state', 'installrate'],
    key_on='feature.id',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='click rate (%)'
).add_to(m)
folium.LayerControl().add_to(m)
m

In [None]:
save_path = r'C:\PythonBC\RootData'
m.save(os.path.join(save_path, 'installrate.html'))

## Zip code level map of an individual state

In [None]:
df.state.value_counts()

In [None]:
df_state = df[df.state == 'ND']
df_state.geo_zip = df_state.geo_zip.apply(int)
print(df_state.shape)
dfzips = df.groupby('geo_zip').sum()['clicks'].to_frame()
dfzips.reset_index(level=0, inplace=True)

In [None]:
import folium
json_dir = r'C:\PythonBC\State-zip-code-GeoJSON'
fname_json = 'map_ND.geojson'
zip_geo = os.path.join(json_dir, fname_json)
print(zip_geo)
m = folium.Map(location=[47.5515, -101.0020], zoom_start=6) # north dakota
folium.Choropleth(
    geo_data=zip_geo,
    name='choropleth',
    data=dfzips,
    columns=['geo_zip', 'clicks'],
    key_on="feature.properties.ZCTA5CE10",
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='clicks'
).add_to(m)
folium.LayerControl().add_to(m)
m
m.save('index.html')


In [None]:
print(df_state.geo_zip.iloc[0])
print(type(df_state.geo_zip.iloc[0]))

In [None]:
type(dfzips)