# Exploring Disney Land Wait Time Data Set

In [1]:
import pandas as pd ## call pandas functions using "pd"
import numpy as np  ## call numpy functions using "np"

#import meta data
meta = pd.read_csv("disney.csv")

#import magic kingdom rides (wait times)
splash = pd.read_csv("splash_mountain.csv")
seven_dwarfs = pd.read_csv("7_dwarfs_train.csv")
pirates = pd.read_csv('pirates_of_caribbean.csv')


In [2]:
# First five rows of meta, five columns, # start date 2012
meta.iloc[0:5, 0:10]

Unnamed: 0,DATE,WDW_TICKET_SEASON,DAYOFWEEK,DAYOFYEAR,WEEKOFYEAR,MONTHOFYEAR,YEAR,SEASON,HOLIDAYPX,HOLIDAYM
0,01/01/2012,,1,0,1,1,2012,CHRISTMAS PEAK,0,5
1,01/02/2012,,2,1,1,1,2012,CHRISTMAS,2,5
2,01/03/2012,,3,2,1,1,2012,CHRISTMAS,3,0
3,01/04/2012,,4,3,1,1,2012,CHRISTMAS,4,0
4,01/05/2012,,5,4,1,1,2012,CHRISTMAS,5,0


In [4]:
# Dropping sactmin from magic world rides, then dropping NaN from spostmin

# Drop rows where wait time is not available (-999)
pirates = pirates[pirates.SPOSTMIN > 0]
seven_dwarfs = seven_dwarfs[seven_dwarfs.SPOSTMIN > 0]
splash = splash[splash.SPOSTMIN > 0]

splash.head() #has posted minutes for each ride

Unnamed: 0,date,datetime,SPOSTMIN,SACTMIN
0,01/01/2012,2012-01-01 10:31:00,30.0,
1,01/01/2012,2012-01-01 10:40:00,30.0,
3,01/01/2012,2012-01-01 12:02:00,30.0,
5,01/01/2012,2012-01-01 12:38:00,35.0,
6,01/01/2012,2012-01-01 12:44:00,25.0,


In [5]:
#Merging each ride with meta_data, using date as key
meta_splash = pd.merge(splash, meta, left_on="date", right_on="DATE").drop('DATE', axis = 1)

In [None]:
#skip run due to crashing for now
#meta_dwarfs = pd.merge(seven_dwarfs, meta, left_on="date", right_on="DATE").drop('DATE', axis = 1)
#meta_pirates = pd.merge(pirates, meta, left_on="date", right_on="DATE").drop('DATE', axis = 1)

In [6]:
#change NAs to 0 values
meta_splash = meta_splash.fillna(0)
#format date in a way that can be worked with
meta_splash['date'] = pd.to_datetime(meta_splash['date'])
meta_splash.head()

Unnamed: 0,date,datetime,SPOSTMIN,SACTMIN,WDW_TICKET_SEASON,DAYOFWEEK,DAYOFYEAR,WEEKOFYEAR,MONTHOFYEAR,YEAR,...,HSFIREWKS,AKPRDDAY,AKPRDDT1,AKPRDDT2,AKPRDDN,AKFIREN,AKSHWNGT,AKSHWNT1,AKSHWNT2,AKSHWNN
0,2012-01-01,2012-01-01 10:31:00,30.0,0.0,0,1,0,1,1,2012,...,1,1,15:45,0.0,Mickey's Jingle Jungle Parade,0.0,0,0,0,0
1,2012-01-01,2012-01-01 10:40:00,30.0,0.0,0,1,0,1,1,2012,...,1,1,15:45,0.0,Mickey's Jingle Jungle Parade,0.0,0,0,0,0
2,2012-01-01,2012-01-01 12:02:00,30.0,0.0,0,1,0,1,1,2012,...,1,1,15:45,0.0,Mickey's Jingle Jungle Parade,0.0,0,0,0,0
3,2012-01-01,2012-01-01 12:38:00,35.0,0.0,0,1,0,1,1,2012,...,1,1,15:45,0.0,Mickey's Jingle Jungle Parade,0.0,0,0,0,0
4,2012-01-01,2012-01-01 12:44:00,25.0,0.0,0,1,0,1,1,2012,...,1,1,15:45,0.0,Mickey's Jingle Jungle Parade,0.0,0,0,0,0


In [7]:
#sum all posted wait times by day in meta_splash
sum_by_day = meta_splash.groupby('date')['SPOSTMIN'].sum()
#reset index to create dataframe columns
sum_by_day = sum_by_day.reset_index()
sum_by_day = pd.DataFrame(sum_by_day,columns=['date','SPOSTMIN'])
sum_by_day.head()

Unnamed: 0,date,SPOSTMIN
0,2012-01-01,415.0
1,2012-01-02,350.0
2,2012-01-03,10.0
3,2012-01-04,120.0
4,2012-01-05,535.0


In [15]:
#compute weighted average of posted wait times by day
avg = sum_by_day.groupby('date')['SPOSTMIN'].mean()
#reset index to create dataframe columns
avg = avg.reset_index()
#name columns of df
avg = pd.DataFrame(avg,columns=['date','SPOSTMIN'])
avg.head()

date        datetime64[ns]
SPOSTMIN           float64
dtype: object

In [23]:
meta['DATE'] = pd.to_datetime(meta['DATE'])
#merge back with original meta set for daily weighted averages
meta_splash2 = pd.merge(avg, meta, left_on="date", right_on="DATE").drop('DATE', axis = 1)
meta_splash2.head()

Unnamed: 0,date,SPOSTMIN,WDW_TICKET_SEASON,DAYOFWEEK,DAYOFYEAR,WEEKOFYEAR,MONTHOFYEAR,YEAR,SEASON,HOLIDAYPX,...,HSFIREWKS,AKPRDDAY,AKPRDDT1,AKPRDDT2,AKPRDDN,AKFIREN,AKSHWNGT,AKSHWNT1,AKSHWNT2,AKSHWNN
0,2012-01-01,415.0,,1,0,1,1,2012,CHRISTMAS PEAK,0,...,1,1,15:45,,Mickey's Jingle Jungle Parade,,0,,,
1,2012-01-02,350.0,,2,1,1,1,2012,CHRISTMAS,2,...,1,1,15:45,,Mickey's Jingle Jungle Parade,,0,,,
2,2012-01-03,10.0,,3,2,1,1,2012,CHRISTMAS,3,...,1,1,15:45,,Mickey's Jammin' Jungle Parade,,0,,,
3,2012-01-04,120.0,,4,3,1,1,2012,CHRISTMAS,4,...,1,1,15:45,,Mickey's Jammin' Jungle Parade,,0,,,
4,2012-01-05,535.0,,5,4,1,1,2012,CHRISTMAS,5,...,1,1,15:45,,Mickey's Jammin' Jungle Parade,,0,,,


In [31]:
#fill na for new meta splash data
meta_splash2 = meta_splash2.fillna(0)

In [32]:
xmas = meta_splash2[meta_splash2.SEASON == 'CHRISTMAS']
xmas

Unnamed: 0,date,SPOSTMIN,WDW_TICKET_SEASON,DAYOFWEEK,DAYOFYEAR,WEEKOFYEAR,MONTHOFYEAR,YEAR,SEASON,HOLIDAYPX,...,HSFIREWKS,AKPRDDAY,AKPRDDT1,AKPRDDT2,AKPRDDN,AKFIREN,AKSHWNGT,AKSHWNT1,AKSHWNT2,AKSHWNN
1,2012-01-02,350.0,0,2,1,1,1,2012,CHRISTMAS,2,...,1,1,15:45,0.0,Mickey's Jingle Jungle Parade,0.0,0,0,0,0
2,2012-01-03,10.0,0,3,2,1,1,2012,CHRISTMAS,3,...,1,1,15:45,0.0,Mickey's Jammin' Jungle Parade,0.0,0,0,0,0
3,2012-01-04,120.0,0,4,3,1,1,2012,CHRISTMAS,4,...,1,1,15:45,0.0,Mickey's Jammin' Jungle Parade,0.0,0,0,0,0
4,2012-01-05,535.0,0,5,4,1,1,2012,CHRISTMAS,5,...,1,1,15:45,0.0,Mickey's Jammin' Jungle Parade,0.0,0,0,0,0
5,2012-01-06,565.0,0,6,5,1,1,2012,CHRISTMAS,6,...,1,1,15:45,0.0,Mickey's Jammin' Jungle Parade,0.0,0,0,0,0
6,2012-01-07,205.0,0,7,6,1,1,2012,CHRISTMAS,7,...,1,1,15:45,0.0,Mickey's Jammin' Jungle Parade,0.0,0,0,0,0
7,2012-01-08,415.0,0,1,7,2,1,2012,CHRISTMAS,8,...,1,1,15:45,0.0,Mickey's Jammin' Jungle Parade,0.0,0,0,0,0
8,2012-01-09,535.0,0,2,8,2,1,2012,CHRISTMAS,8,...,1,1,15:45,0.0,Mickey's Jammin' Jungle Parade,0.0,0,0,0,0
348,2012-12-18,610.0,0,3,352,51,12,2012,CHRISTMAS,7,...,1,1,15:45,0.0,Mickey's Jingle Jungle Parade,0.0,0,0,0,0
349,2012-12-19,1615.0,0,4,353,51,12,2012,CHRISTMAS,6,...,1,1,15:45,0.0,Mickey's Jingle Jungle Parade,0.0,0,0,0,0
