In [1]:
import pandas as pd
import numpy as np

import geopandas as gpd
from shapely.geometry import Point
import rtree
import pickle

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [2]:
event_list_df = pd.read_csv("data/NYC_Parks_Events_Listing___Event_Listing.csv", parse_dates=True)

In [3]:
loc_df = pd.read_csv('data/NYC_Parks_Events_Listing___Event_Locations.csv')

In [4]:
cat_df = pd.read_csv('data/NYC_Parks_Events_Listing___Event_Categories.csv')

In [5]:
org_df = pd.read_csv('data/NYC_Parks_Events_Listing___Event_Organizers.csv')

In [6]:
event_list_df['start_time'] = pd.to_datetime(event_list_df['start_time'], format='%H:%M')

In [7]:
event_list_df[event_list_df.end_time == '24:00']

Unnamed: 0,event_id,title,date,start_time,end_time,location_description,description,snippet,phone,email,cost_free,cost_description,must_see,url,notice
11088,79666,New Year's Eve Fireworks in Prospect Park,12/31/2013,1900-01-01 23:00:00,24:00,,<p>\n\tStart the celebrations off around 11 p....,Start the celebrations off around 11 p.m. with...,,,1,,1,new-years-eve-fireworks1,0
23359,102431,New Year's Eve Fireworks,12/31/2014,1900-01-01 23:00:00,24:00,,<p>Kick off the New Year&#39;s with this belov...,Kick off the New Year's with this beloved Broo...,,,1,,1,new-years-eve-fireworks,0


In [8]:
event_list_df = event_list_df.drop(event_list_df.index[23359])
event_list_df = event_list_df.drop(event_list_df.index[11088])

In [9]:
event_list_df[event_list_df.end_time == '24:00']

Unnamed: 0,event_id,title,date,start_time,end_time,location_description,description,snippet,phone,email,cost_free,cost_description,must_see,url,notice


In [10]:
event_list_df['end_time'] = pd.to_datetime(event_list_df['end_time'], format='%H:%M')

In [11]:
event_list_df['date'] = pd.to_datetime(event_list_df.date)

In [12]:
event_list_df['is_event'] = 1

In [13]:
event_list_df['event_duration'] = ((event_list_df.end_time-event_list_df.start_time).dt.total_seconds())/60

In [14]:
event_list_df['DOW'] = event_list_df.date.dt.weekday_name

In [15]:
event_list_df['TOD'] = event_list_df.start_time.dt.hour

In [None]:
event_list_df.date.dt.

In [65]:
event_list_df['is_weekend'] = ((event_list_df.date.dt.dayofweek) // 5 == 1).astype(float)

In [66]:
cat_df.name.unique().shape

(122,)

In [67]:
org_df.event_organizer.unique().shape

(2237,)

In [68]:
event_list_df = event_list_df[event_list_df.date.dt.year == 2017]

In [69]:
merged_df = pd.merge(event_list_df, loc_df, on=['event_id'])

In [70]:
merged_df = pd.merge(merged_df, cat_df, on=['event_id'])

In [71]:
merged_df = pd.merge(merged_df, org_df, on=['event_id'])

In [72]:
merged_df.head()

Unnamed: 0,event_id,title,date,start_time,end_time,location_description,description,snippet,phone,email,cost_free,cost_description,must_see,url,notice,is_event,event_duration,DOW,TOD,is_weekend,name_x,park_id,lat,long,address,zip,borough,accessible,Location 1,name_y,event_organizer
0,107941,Conference House Art Expo 2017,2017-05-20,1900-01-01 11:00:00,1900-01-01 17:00:00,,<p>Come view original artworks and tree instal...,Come view original artworks and tree installat...,(718) 984-6046,admin@conferencehouse.org,1,,0,resiliency-and-elements,0,1,360.0,Saturday,11,1.0,Conference House Park Visitor Center,R006,40.502602,-74.251801,298 Satterlee Street,,R,1.0,"(40.50260162353500000, -74.25180053710900000)",Arts & Crafts,Conference House Park
1,107941,Conference House Art Expo 2017,2017-05-20,1900-01-01 11:00:00,1900-01-01 17:00:00,,<p>Come view original artworks and tree instal...,Come view original artworks and tree installat...,(718) 984-6046,admin@conferencehouse.org,1,,0,resiliency-and-elements,0,1,360.0,Saturday,11,1.0,Conference House Park Visitor Center,R006,40.502602,-74.251801,298 Satterlee Street,,R,1.0,"(40.50260162353500000, -74.25180053710900000)",Art,Conference House Park
2,107941,Conference House Art Expo 2017,2017-05-20,1900-01-01 11:00:00,1900-01-01 17:00:00,,<p>Come view original artworks and tree instal...,Come view original artworks and tree installat...,(718) 984-6046,admin@conferencehouse.org,1,,0,resiliency-and-elements,0,1,360.0,Saturday,11,1.0,Conference House Park Visitor Center,R006,40.502602,-74.251801,298 Satterlee Street,,R,1.0,"(40.50260162353500000, -74.25180053710900000)",Markets,Conference House Park
3,107941,Conference House Art Expo 2017,2017-05-20,1900-01-01 11:00:00,1900-01-01 17:00:00,,<p>Come view original artworks and tree instal...,Come view original artworks and tree installat...,(718) 984-6046,admin@conferencehouse.org,1,,0,resiliency-and-elements,0,1,360.0,Saturday,11,1.0,Conference House Park Visitor Center,R006,40.502602,-74.251801,298 Satterlee Street,,R,1.0,"(40.50260162353500000, -74.25180053710900000)",Seniors,Conference House Park
4,108867,Thursday Evening Hours at the Merchant’s House...,2017-01-05,1900-01-01 12:00:00,1900-01-01 20:00:00,,"<p>Beginning May 7, 2015, the Merchant’s House...","Beginning May 7, 2015, the Merchant’s House Mu...",(212) 777-1089,emily@merchantshouse.org,0,"Admission is $10, $5 seniors & students, Free ...",0,thursday-evening-hours-at-the-merchants-house-...,0,1,480.0,Thursday,12,0.0,Merchant's House Museum,M318,40.727699,-73.992302,29 East 4th Street,10003.0,M,0.0,"(40.72769927978515600, -73.99230194091797000)",Historic House Trust Sites,Merchant's House Museum


In [73]:
merged_df.isnull().sum()

event_id                    0
title                       0
date                        0
start_time                  0
end_time                    0
location_description    32373
description                 0
snippet                    12
phone                    6858
email                    9380
cost_free                   0
cost_description        33826
must_see                    0
url                         4
notice                      0
is_event                    0
event_duration              0
DOW                         0
TOD                         0
is_weekend                  0
name_x                      0
park_id                  3977
lat                         0
long                        0
address                 19123
zip                     17440
borough                   696
accessible              18799
Location 1                  0
name_y                      0
event_organizer             0
dtype: int64

In [84]:
test = merged_df.groupby('name_y')[['event_id','is_weekend']].count().reset_index()

In [87]:
test[test.is_weekend == 0.0]

Unnamed: 0,name_y,event_id,is_weekend


In [88]:
test.sort_values('event_id', ascending=False)

Unnamed: 0,name_y,event_id,is_weekend
8,Best for Kids,3352,3352
75,Nature,2778,2778
2,Art,2273,2273
29,Education,2156,2156
38,Fitness,2023,2023
0,Accessible,1949,1949
101,Tours,1640,1640
56,History,1485,1485
91,Seniors,1432,1432
80,Outdoor Fitness,1316,1316
