In [None]:
import pandas as pd
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
import pandas as pd
import numpy as np
import json
import math
import plotly.express as px
# import plotly.graph_objs as go
import datetime as dt
import seaborn as sns

# Data Cleaning

In [None]:
# first, we read the csv file as a pandas dataframe object
permit = pd.read_csv("Film_Permits.csv")

In [None]:
# we drop the nan values in the dataframe
permit = permit.dropna()
# convert to datetime 可以之后再做一些date analysis什么的 根据时间怎么变化
permit['StartDateTime'] = pd.to_datetime(permit['StartDateTime'])
permit['EndDateTime'] = pd.to_datetime(permit['EndDateTime'])
permit['duration'] = permit.apply(lambda x: x['EndDateTime']- x['StartDateTime'], axis=1)

In [None]:
# here we print the first 5 rows of the dataframe
permit.head()
# we are going to use zipcode as the key to plot in the map
# we can see some permits have two zipcodes, need to seperate them as different row

In [None]:
col_names_wo_zipcode = list(permit.columns)
col_names_wo_zipcode.remove('ZipCode(s)')
print(col_names_wo_zipcode)

In [None]:
# we split the zip code so that every row only has one zip code
permit = permit.set_index(col_names_wo_zipcode).apply(lambda x: x.str.split(',').explode()).reset_index()

In [None]:
#we can see now there is only one zipcode per row
permit.head()

# Data Analysis

In [None]:
# want to see the relationship between event type and category. How many events in what category and how many categories in what event
typecategory = permit[['EventType','Category']].copy()
#we set a count
typecategory['count'] = 1
typecategory = typecategory.groupby(['EventType','Category']).count()[['count']]

typecategory = typecategory.reset_index()
unique_event_types = np.unique(typecategory['EventType'])
unique_event_types = unique_event_types.reshape((2,2))
import matplotlib.pyplot as plt

# Pie chart, where the slices will be ordered and plotted counter-clockwise:
sizes = [15, 30, 45, 10]
explode = (0, 0.1, 0, 0)  # only "explode" the 2nd slice (i.e. 'Hogs')

fig, ax = plt.subplots(nrows=2, ncols=2,figsize=(20,20))
for i in range(2):
    for j in range(2):
        eventtypedf = typecategory[typecategory['EventType']==unique_event_types[i][j]]
        ax[i][j].pie(eventtypedf['count'], labels=eventtypedf['Category'], autopct='%1.1f%%',
                shadow=True, startangle=90)
        ax[i][j].axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
        ax[i][j].set_title(unique_event_types[i][j])
        ax[i][j].legend(loc="lower right")
plt.show()

In [None]:
unique_category = np.unique(typecategory['Category'])
# unique_category = unique_category.reshape()

In [None]:
pivot = pd.pivot_table(typecategory,values='count', index=['Category'],
                    columns=['EventType'])
stacked_bar_df = pivot.fillna(0).cumsum().reset_index()

In [None]:
import matplotlib.pyplot as plt
stacked_bar_df.plot(x='Category', kind='bar', stacked=True,title='Event type distribution for each category')
plt.show()

In [None]:
# initialize
permit['hour'] = 0

def transfer_to_hour(x):
    if x['duration']>dt.timedelta(1):
        hour = np.arange(0,24)
    else:
        hour = np.arange(x['StartDateTime'].hour,24)
        hour=np.append(hour,np.arange(0,x['EndDateTime'].hour))
    return hour

# find the effective hours of a day of each permit
permit['hour'] = permit.apply(transfer_to_hour,axis=1)

In [None]:
#take a look, each permit has the effective hours in the hour column
permit.head()

In [None]:
#do the same thing as zipcode
hourly_permit = permit.explode('hour')
hourly_eventype_count = hourly_permit.groupby(['hour','EventType']).count()[['EventID']]
hourly_category_count = hourly_permit.groupby(['hour','Category']).count()[['EventID']]

heatmap_category_pt  = pd.pivot_table(hourly_category_count, values='EventID', 
 index=['hour'], 
 columns='Category')
heatmap_eventtype_pt  = pd.pivot_table(hourly_eventype_count, values='EventID', 
 index=['hour'], 
 columns='EventType')

In [None]:
sns.heatmap(heatmap_eventtype_pt,cmap='YlGnBu' )

In [None]:
sns.heatmap(heatmap_category_pt,cmap='YlGnBu' )

## Static map (sum of number of permit by area)

In [None]:
zipcodecount = permit.groupby(["ZipCode(s)"]).count()[['EventType']]
zipcodecount = zipcodecount.reset_index()
zipcodecount.columns = ['zipcode','count']
permit["ZipCode(s)"].value_counts()

zipmap = json.load(open("ZIP map2.geojson"))

fig = px.choropleth_mapbox(zipcodecount,
                           geojson=zipmap,
                           locations="zipcode",
                           featureidkey="properties.postalCode",
                           color="count",
                           color_continuous_scale="blackbody",
                           mapbox_style="carto-positron",
                           zoom=9, center={"lat": 40.7, "lon": -73.9},
                           opacity=0.7,
                           hover_name="count"
                           )


fig.show()

## Dynamic map (sum of number of permit by area by hour)

In [None]:
zipcodehourcount = permit.explode('hour').groupby(["ZipCode(s)",'hour']).count()[['EventType']]
zipcodehourcount = zipcodehourcount.reset_index()
zipcodehourcount.columns = ['zipcode','hour','count']

fig = px.choropleth_mapbox(zipcodehourcount,
                           geojson=zipmap,
                           locations="zipcode",
                           featureidkey="properties.postalCode",
                           color="count",
                           color_continuous_scale="blackbody",
                           mapbox_style="carto-positron",
                           zoom=10, center={"lat": 40.7, "lon": -73.9},
                           opacity=0.7,
                           hover_name="count",
                           animation_frame='hour'
                           )


fig.show()