# Exploring Disney Land Wait Time Data Set

In [1]:
import pandas as pd ## call pandas functions using "pd"
import numpy as np  ## call numpy functions using "np"

#import meta data
meta = pd.read_csv("disney.csv")

#import magic kingdom rides (wait times)
splash = pd.read_csv("splash_mountain.csv")
seven_dwarfs = pd.read_csv("7_dwarfs_train.csv")
pirates = pd.read_csv('pirates_of_caribbean.csv')


In [2]:
# First five rows of meta, five columns, # start date 2012
meta.iloc[0:5, 0:10]

Unnamed: 0,DATE,WDW_TICKET_SEASON,DAYOFWEEK,DAYOFYEAR,WEEKOFYEAR,MONTHOFYEAR,YEAR,SEASON,HOLIDAYPX,HOLIDAYM
0,01/01/2012,,1,0,1,1,2012,CHRISTMAS PEAK,0,5
1,01/02/2012,,2,1,1,1,2012,CHRISTMAS,2,5
2,01/03/2012,,3,2,1,1,2012,CHRISTMAS,3,0
3,01/04/2012,,4,3,1,1,2012,CHRISTMAS,4,0
4,01/05/2012,,5,4,1,1,2012,CHRISTMAS,5,0


In [3]:
# Dropping sactmin from magic world rides, then dropping NaN from spostmin

# Drop rows where wait time is not available (-999)
pirates = pirates[pirates.SPOSTMIN > 0]
seven_dwarfs = seven_dwarfs[seven_dwarfs.SPOSTMIN > 0]
splash = splash[splash.SPOSTMIN > 0]

splash.head() #has posted minutes for each ride

Unnamed: 0,date,datetime,SPOSTMIN,SACTMIN
0,01/01/2012,2012-01-01 10:31:00,30.0,
1,01/01/2012,2012-01-01 10:40:00,30.0,
3,01/01/2012,2012-01-01 12:02:00,30.0,
5,01/01/2012,2012-01-01 12:38:00,35.0,
6,01/01/2012,2012-01-01 12:44:00,25.0,


In [4]:
#Merging each ride with meta_data, using date as key
meta_splash = pd.merge(splash, meta, left_on="date", right_on="DATE").drop('DATE', axis = 1)

In [None]:
#skip run due to crashing for now
#meta_dwarfs = pd.merge(seven_dwarfs, meta, left_on="date", right_on="DATE").drop('DATE', axis = 1)
#meta_pirates = pd.merge(pirates, meta, left_on="date", right_on="DATE").drop('DATE', axis = 1)

In [6]:
#change NAs to 0 values
meta_splash = meta_splash.fillna(0)
meta_splash['date'] = pd.to_datetime(meta_splash['date'])
meta_splash.head()

Unnamed: 0,date,datetime,SPOSTMIN,SACTMIN,WDW_TICKET_SEASON,DAYOFWEEK,DAYOFYEAR,WEEKOFYEAR,MONTHOFYEAR,YEAR,...,HSFIREWKS,AKPRDDAY,AKPRDDT1,AKPRDDT2,AKPRDDN,AKFIREN,AKSHWNGT,AKSHWNT1,AKSHWNT2,AKSHWNN
0,2012-01-01,2012-01-01 10:31:00,30.0,0.0,0,1,0,1,1,2012,...,1,1,15:45,0.0,Mickey's Jingle Jungle Parade,0.0,0,0,0,0
1,2012-01-01,2012-01-01 10:40:00,30.0,0.0,0,1,0,1,1,2012,...,1,1,15:45,0.0,Mickey's Jingle Jungle Parade,0.0,0,0,0,0
2,2012-01-01,2012-01-01 12:02:00,30.0,0.0,0,1,0,1,1,2012,...,1,1,15:45,0.0,Mickey's Jingle Jungle Parade,0.0,0,0,0,0
3,2012-01-01,2012-01-01 12:38:00,35.0,0.0,0,1,0,1,1,2012,...,1,1,15:45,0.0,Mickey's Jingle Jungle Parade,0.0,0,0,0,0
4,2012-01-01,2012-01-01 12:44:00,25.0,0.0,0,1,0,1,1,2012,...,1,1,15:45,0.0,Mickey's Jingle Jungle Parade,0.0,0,0,0,0


In [8]:
#weighted average on a day by hours in a day
wt = meta_splash.groupby('date')['SPOSTMIN'].mean()
wt

date
2012-01-01    37.727273
2012-01-02    43.750000
2012-01-03     5.000000
2012-01-04    17.142857
2012-01-05    41.153846
2012-01-06    37.666667
2012-01-07    25.625000
2012-01-08    18.043478
2012-01-09    53.500000
2012-01-10    32.307692
2012-01-11     7.857143
2012-01-12    56.818182
2012-01-13     5.000000
2012-01-14     8.000000
2012-01-15    14.444444
2012-01-16    33.000000
2012-01-17    51.250000
2012-01-18    10.000000
2012-01-19     6.250000
2012-01-20    36.250000
2012-01-21    28.333333
2012-01-22    20.000000
2012-01-23    38.750000
2012-01-24    42.500000
2012-01-25    10.000000
2012-01-26    36.875000
2012-01-27    12.500000
2012-01-28    30.000000
2012-01-29    19.600000
2012-01-30    22.500000
                ...    
2019-04-01    42.500000
2019-04-02    52.833333
2019-04-03    75.280000
2019-04-04    54.797297
2019-04-05    53.798450
2019-04-06    56.694215
2019-04-07    44.007937
2019-04-08    44.026549
2019-04-09    28.907563
2019-04-10    45.750000
2019-04-11 

In [None]:
#sum all posted wait times by day
sum_by_day = meta_splash.groupby('date')['SPOSTMIN'].sum()
#reset index to create dataframe columns
sum_by_day = sum_by_day.reset_index()
sum_by_day = pd.DataFrame(sum_by_day,columns=['date','SPOSTMIN'])
sum_by_day.head()

In [None]:
#format the date in the correct way to be manipulated
sum_by_day['date'] = pd.to_datetime(sum_by_day['date'])
sum_by_day.head()

In [None]:
#compute weighted average of posted wait times by day
avg = sum_by_day.groupby('date')['SPOSTMIN'].mean()
#reset index to create dataframe columns
avg = avg.reset_index()
#name columns of df
avg = pd.DataFrame(avg,columns=['date','SPOSTMIN'])
avg.head()

In [None]:
meta_splash.head()