In [33]:
import pandas as pd

numrowsurl = 'https://data.cityofnewyork.us/resource/tg4x-b46p.json?$select=count(EventID)'
numrowsjson = pd.read_json(numrowsurl, orient='columns')
print(list(numrowsjson.columns.values))

['count_EventID']


In [35]:
rowcount = int(numrowsjson['count_EventID'])
print(rowcount)

40172


In [37]:
url = 'https://data.cityofnewyork.us/resource/tg4x-b46p.json?$limit=1000'
filmdata = pd.read_json(url, orient='columns')
#initialize counters
offset = 1000
limit = 1000
while offset < rowcount:
    try:
        tempurl = 'https://data.cityofnewyork.us/resource/tg4x-b46p.json?$limit=' + str(limit) + '&$offset=' + str(offset)
        tempfilmdata = pd.read_json(tempurl, orient='columns')
        filmdata = filmdata.append(tempfilmdata)
        offset = offset + 1000
    except:
        print("Error Out")

In [38]:
print(list(filmdata.columns.values))
print(filmdata.shape)

['borough', 'category', 'communityboard_s', 'country', 'enddatetime', 'enteredon', 'eventagency', 'eventid', 'eventtype', 'parkingheld', 'policeprecinct_s', 'startdatetime', 'subcategoryname', 'zipcode_s']
(40172, 14)


In [80]:
zipcodes = filmdata['zipcode_s']
zipcodes = zipcodes.tolist()
print(zipcodes[0:5])

['11222', '11222', '11101', '11222', '11222']


In [79]:
zipcodes_clean = []
for sublist in zipcodes:
    tempsublist = str(sublist).split(",")
    for val in tempsublist:
        zipcodes_clean.append(val)
print(zipcodes_clean[0:5])

['11222', '11222', '11101', '11222', '11222']


In [42]:
#https://en.wikipedia.org/wiki/Boroughs_of_New_York_City -- Area of Each Borough
#Manhattan: 22.83 mi^2
#Bronx: 42.1 mi^2
#Brooklyn: 70.82 mi^2
#Queens: 108.53 mi^2
#Staten Island: 108.53 mi^2

In [68]:
#Top Most Common Zip Codes Where Filming Occurs
from collections import Counter
zipinstances = Counter(zipcodes_clean)

#Most common zip codes are Greenpoint; Brooklyn 
#https://untappedcities.com/2014/04/04/nycs-micro-neighborhoods-little-poland-in-greenpoint-brooklyn/
#Long Island City/Hunters Point, Queens; Hells Kitchen, Manhattan; 
#Hudson Yards, Manhattan

In [71]:
plotzip = pd.DataFrame(zipinstances.most_common())
plotzip.columns = ['Zip Code', 'Movie Shoot Frequency']
print(plotzip[0:5])

  Zip Code  Movie Shoot Frequency
0    11222                   3738
1    11101                   3491
2    10019                   2353
3    10001                   1949
4    10036                   1918


In [72]:
#Import NYC Zip Code GeoJSON file
import requests
r = requests.get(url='http://data.beta.nyc//dataset/3bf5fb73-edb5-4b05-bb29-7c95f4a727fc/resource/6df127b1-6d04-4bb7-b983-07402a2c3f90/download/f4129d9aa6dd4281bc98d0f701629b76nyczipcodetabulationareas.geojson')
tmp = r.json()

#Remove ZIP codes not in our dataset
geozips = []
for i in range(len(tmp['features'])):
    if tmp['features'][i]['properties']['postalCode'] in list(plotzip['Zip Code'].unique()):
        geozips.append(tmp['features'][i])
print(len(tmp['features']))        
print(len(geozips))

262
192


In [73]:
#Create new JSON object
import json

newjson = dict.fromkeys(['type','features'])
newjson['type'] = 'FeatureCollection'
newjson['features'] = geozips

#Save JSON object as file
open("updated-nycgeo.json", "w").write(json.dumps(newjson, sort_keys = True, indent = 4, separators = (',',': ')))

1757223

In [74]:
import folium

def create_map(table, zips, mapped_feature, add_text = ''):
    #Read in Updated GeoJSON file
    ny_geo = r'updated-nycgeo.json'
    #Initiate Folium map with NY's longitude and latitude
    m = folium.Map(location = [40.7128, -74.0060],zoom_start = 12, tiles = "Mapbox Control Room",)
    #Create Choropleth Map
    m.choropleth(
        geo_data = ny_geo,
        fill_opacity = 0.7,
        line_opacity = 0.2,
        data = table,
        key_on = 'feature.properties.postalCode',
        columns = [zips, mapped_feature],
        fill_color = 'YlGn',
        legend_name = (' ').join(mapped_feature.split('_')).title() + ' ' + add_text + ' Across NY')
    folium.LayerControl().add_to(m)
    m.save(outfile = mapped_feature + '_map.html')

In [75]:
create_map(plotzip, 'Zip Code', 'Movie Shoot Frequency','')

In [48]:
#Most Commonly Filmed Content in NYC
category = filmdata['category'].tolist()
subcategory = filmdata['subcategoryname'].tolist()

catinstances = Counter(category)
print(catinstances.most_common()[0:5])

subcatinstances = Counter(subcategory)
print(subcatinstances.most_common()[0:5])

[('Television', 21175), ('Film', 7282), ('Theater', 3660), ('Commercial', 3434), ('Still Photography', 2609)]
[('Episodic series', 11539), ('Feature', 5775), ('Not Applicable', 5756), ('Cable-episodic', 4277), ('Theater', 3660)]


In [78]:
#Most Common Content by Borough
boroughgroup = filmdata.groupby(['borough','category']).count()['eventid'].sort_values(ascending=False)
print(boroughgroup[0:5])

#Should we normalize this?

borough    category  
Manhattan  Television    8842
Brooklyn   Television    6593
Queens     Television    4815
Manhattan  Film          3430
           Theater       3179
Name: eventid, dtype: int64


In [81]:
#What parking areas are being taken the most advantage of?
#Worst places to park?
parkinggroup = filmdata.groupby(['parkingheld']).count()['eventid'].sort_values(ascending=False)
print(parkinggroup[0:5])

#Top Area = Times Square, UWS, Hell's Kitchen by the Pier, Greenpoint, BK
#Is it that parking is not needed in top zip codes? 
#Maybe aggregate existence of ParkingHeld by ZipCode and see which Zip has the most

parkingheld
WEST   48 STREET between 6 AVENUE and 7 AVENUE                                                                                                                                      814
AMSTERDAM AVENUE between WEST   73 STREET and WEST   75 STREET,  BROADWAY between WEST   74 STREET and WEST   75 STREET,  WEST   75 STREET between AMSTERDAM AVENUE and BROADWAY    404
WEST   55 STREET between 11 AVENUE and 12 AVENUE                                                                                                                                    382
NORTH HENRY STREET between GREENPOINT AVENUE and MESEROLE AVENUE                                                                                                                    259
WEST   44 STREET between BROADWAY and 6 AVENUE                                                                                                                                      220
Name: eventid, dtype: int64


In [52]:
#Parking Held by ZipCode
#zipcodes_clean = []
#parkingheld_clean = []
#for i in range(0,len(filmdata['zipcode_s'])):
#    tempsublist = str(filmdata['zipcode_s'][i]).split(",")
#    for val in tempsublist:
#        zipcodes_clean.append(val)
#        parkingheld_clean.append(filmdata['parkingheld'][i])
#
#zippark_df = pd.DataFrame(
#    {'zipcode': zipcodes_clean,
#     'parkingheld': parkingheld_clean
#    })

#parkingbyzip = zippark_df.groupby(['zipcode']).count()['parkingheld'].sort_values(ascending=False)
#print(parkingbyzip[0:5])

#Parking is held for every single permit requested because these numbers match up with the ZipCode counts overall
#What is number of residential vs commercial buildings in each of these areas?
#What intersections in GreenPoint, Long Island City, Hell's Kitchen?

In [53]:
#Next Steps: 
#Where are the longest shoots?
#Where have most minutes of parking time been held?
#Different types of EventAgencies?
#Which zip code gets the most hours of content shoot?
#Distribution of Event Types
#https://patch.com/new-york/parkslope/when-filming-brooklyn-too-much-good-thing