In [165]:
import pandas as pd
import numpy as np

import datetime as dt

import holidays
from calendra.asia import Qatar
from calendra.asia import HongKong

from sklearn.pipeline import Pipeline

In [163]:
import pandas as pd
import numpy as np
import datetime as dt

class DataRetrieval():
    
    #def __init__(self):

    
    def get_calls_data(self, filepath='../data/Calls_Table_data.csv', delimiter='\t'):
        """Retrieves call data from filepath"""
        df = pd.read_csv(filepath, delimiter='\t', encoding='utf-16')
        df.drop_duplicates(inplace=True)
        df.reset_index(drop=True, inplace=True)        
        return df
    
    def get_weather_data(self, filepath='../data/historical_weather.csv'):
        """Retrieves weather data from filepath"""
        df = pd.read_csv(filepath)
        df['date'] = pd.to_datetime(df['DATE']).dt.date
        weather_hist = (df[['date', 'TMAX', 'PRCP', 'SNOW']]
                        .rename(columns={'PRCP':'precip', 'TMAX':'temp_max',
                                         'PRCP':'precip', 'SNOW':'snow'}))
        weather_hist['precip^2'] = weather_hist['precip']**2
        weather_hist['snow^2'] = weather_hist['snow']**2      
        return weather_hist
    
    def get_seahawks_schedule(self, filepath='../data/seahawks_schedule.csv'):
        """
        Retrieves Seahawks game schedule from filepath
        
        Will need to rewrite scraper used to retrieve this data
        """
        df_in = pd.read_csv(filepath)
        df_dropna = df_in.copy()[df_in['Opp'].notna()].reset_index(drop=True)
        df = df_dropna.copy()[df_dropna['Opp'] != 'Bye Week'].reset_index(drop=True)
        
        
        df['date'] = ''
        for i in range(len(df)):
            df['date'][i] = (f"{df.iloc[i]['Date']}, {df.iloc[i]['year']}")
            
        df['date'] = pd.to_datetime(df['date']).dt.date
        
        df['seahawks_game'] = ''
        for i in range(len(df)):
            if type(df.iloc[i]['Unnamed: 10']) == str:
                if df.iloc[i]['Week'] == 'SuperBowl':
                    df['seahawks_game'][i] = 'SuperBowl'
                elif df.iloc[i]['Week'] in ['Wild Card', 'Division','Conf. Champ.']:
                    df['seahawks_game'][i] = 'away_Playoffs'
                else:
                    df['seahawks_game'][i] = 'away_Regular'
            else:     
                if df.iloc[i]['Week'] == 'SuperBowl':
                    df['seahawks_game'][i] = 'SuperBowl'
                elif df.iloc[i]['Week'] in ['Wild Card', 'Division','Conf. Champ.']:
                    df['seahawks_game'][i] = 'home_Playoffs'
                else:
                    df['seahawks_game'][i] = 'home_Regular'
        return df[['date', 'seahawks_game']].copy()
    
    def get_huskies_schedule(self, filepath='../data/huskies_schedule.csv'):
        """
        Retrieves Huskies game schedule from filepath

        Will need to rewrite scraper used to retrieve this data
        """
        df_in = pd.read_csv(filepath)
        df = df_in.copy()
        df['date'] = pd.to_datetime(df['Date']).dt.date

        df['huskies_game'] = ''
        for i in range(len(df)):
            if type(df.iloc[i]['Unnamed: 6']) == str:
                df['huskies_game'][i] = 'away'
            else:
                df['huskies_game'][i] = 'home'
        return df[['date','huskies_game']].copy()

    def get_sounders_schedule(self, filepath='../data/sounders_schedule.csv'):
        """
        Retrieves Sounders FC game schedule from filepath

        Will write instructions for getting game data
        """
        df_in = pd.read_csv(filepath)
        df = df_in.copy()
        df['date'] = pd.to_datetime(df_in['date_dd_mm_yy']).dt.date
        
        df['sounders_game'] = ''
        for i in range(len(df)):
            if df['home_team'][i].split()[0] == 'Seattle':
                df['sounders_game'][i] = 'home'
            else:
                df['sounders_game'][i] = 'away'
        return df[['date','sounders_game']].drop_duplicates()



In [934]:
import pandas as pd
import numpy as np
import datetime as dt


class CountCalls():
    """Counts calls by date either by city or neighborhood"""
    
    def __init__(self, how='city'):
        self.how = how
        self.X = None
        self.y = None
        
    def fit(self, X, y=None):
        self.X = X
        self.y = y       
        return self
    
    def transform(self):
        
        if self.how == 'city':
            df = self.X[['ORIG_TIME_QUEUED', 'EVENT']].copy()
            df['date'] = pd.to_datetime(df['ORIG_TIME_QUEUED']).dt.date
            df.drop('ORIG_TIME_QUEUED', axis=1, inplace=True)
            return df.groupby('date').count().rename(columns={'EVENT':'num_calls'}).reset_index()
        
        else:
            df = self.X[['NEIGHBORHOOD', 'ORIG_TIME_QUEUED', 'EVENT']].copy()
            df['date'] = pd.to_datetime(df['ORIG_TIME_QUEUED']).dt.date
            df.drop('ORIG_TIME_QUEUED', axis=1, inplace=True)
            counts = df.groupby(['NEIGHBORHOOD', 'date']).count().rename(columns=
                                                                         {'NEIGHBORHOOD':'neighborhood',
                                                                          'EVENT':'num_calls'}).reset_index()
            
            neighborhoods = list(counts['NEIGHBORHOOD'].unique())
            num_days = int(np.timedelta64((max(counts['date']) - min(counts['date'])), 'D')/np.timedelta64(1,'D'))+1
            start = pd.to_datetime(min(counts['date']))
            neighboor_arr = np.array([(neighborhoods*num_days)])
            neighboor_arr = neighboor_arr.flatten()
            dates = [(start + np.timedelta64(i,'D')) for i in range(num_days)]*len(neighborhoods)
            
            df2 = pd.DataFrame({"dt_time": dates})
            df2['date'] = df2["dt_time"].dt.date
            df2['neighborhood'] = neighboor_arr
            df3 = pd.merge(df2, counts, how='outer', left_on=['date','neighborhood'],
                           right_on=['date','NEIGHBORHOOD']).fillna(0)
            return df3[['date', 'neighborhood', 'num_calls']]
        
        

class FeaturizeCalls():
    """Clean incoming df to fit into model"""
    
    def __init__(self):
        self.X = None
        self.y = None
    
    def fit(self, X, y=None):
        self.X = X
        self.y = y
        return self

    
    def transform(self):
        """tranform and clean incoming training or test"""
    
        df = self.X.copy()
        num_days = int(np.timedelta64((max(df['date']) - min(df['date'])), 'D')/np.timedelta64(1,'D'))+1
        start = pd.to_datetime(min(df['date']))
        dates = [(start + np.timedelta64(i,'D')) for i in range(num_days)]

        seq = pd.DataFrame({'dt_time': dates, 'day_seq':np.arange(num_days)})
        seq['date'] = seq['dt_time'].dt.date

        df1 = df.join(seq.set_index('date'), on='date')

        df1['year'] = df1['dt_time'].dt.year
        df1['month'] = df1['dt_time'].dt.month
        df1['day'] = df1['dt_time'].dt.day
        df1['day_of_week'] = df1['dt_time'].dt.weekday
        df1['month_day'] = df1['dt_time'].dt.strftime('%m/%d')
        df1['month_weekday'] = df1['dt_time'].dt.strftime('%b_%a')
        df1['month'] = df1['dt_time'].dt.strftime('%m/%d')     
        return df1


class DateDummies():

    def __init__(self):
        self.X = None
        self.y = None
    
    def fit(self, X, y=None):
        # X is the featurized calls dataframe
        self.X = X 
        self.y = y
        return self

    def transform(self):
        return pd.get_dummies(self.X[['date', 'day', 'month_weekday']].set_index('date'),
                              columns=['day', 'month_weekday']).reset_index()
    

class HolidayDummies():
    
    def __init__(self):
        self.X = None
        self.y = None
    
    def fit(self, X, y=None):
        # X is a dictionary of Holidays
        self.X = X 
        self.y = y
        return self

    def transform(self):
        _holidays = []
        for date in sorted(self.X.keys()):
            _holidays.append([date, self.X[date]])
        _holidays = pd.DataFrame(_holidays, columns=['date', 'holiday'])
        return pd.get_dummies(_holidays.set_index('date')).reset_index()
    
    
class EventDummies():
    
    def __init__(self, event_dict=None):
        self.X = None
        self.y = None
        self.event_dict = event_dict

        defualt_events = ({'Pride Parade' : ['6/30/2019', '6/24/2018', '6/25/2017', '6/26/2016', '6/28/2015',
                                          '6/29/2014', '6/30/2013', '6/24/2012', '6/26/2011', '6/27/2010'],
                            'Seafair' : ['8/2/2019', '8/3/2019', '8/4/2019', '8/3/2018', '8/4/2018', '8/5/2018',
                                         '8/4/2017', '8/5/2017', '8/6/2017', '8/5/2016', '8/6/2016', '8/7/2016',
                                         '7/31/2015', '8/1/2015', '8/2/2015', '8/1/2014', '8/2/2014', '8/3/2014',
                                         '8/2/2013', '8/3/2013', '8/4/2013', '8/3/2012', '8/4/2012', '8/5/2012',
                                         '8/5/2011', '8/6/2011', '8/7/2011', '8/6/2010', '8/7/2010', '8/8/2010' ],
                            'Soltice Parade': ['6/30/2019', '6/16/2018', '6/17/2017', '6/18/2016', '6/20/2015',
                                               '6/21/2014', '6/22/2013', '6/16/2012', '6/18/2011', '6/19/2010'],
                            'Womens March' : ['1/19/2019', '1/20/2018', '1/21/2017'],})
        
        if self.event_dict == None:
            self.event_dict = defualt_events
        
    
    def fit(self, X=None, y=None):
        self.X = X 
        self.y = y
        return self
    
    def transform(self):
        _events = []
        for event in self.event_dict.keys():
            for day in self.event_dict[event]:
                _events.append([dt.datetime.strptime(day, '%m/%d/%Y'), event])
                
        _events = pd.DataFrame(_events, columns=['date','local_event'])
        _events['date'] = _events['date'].dt.date
        return pd.get_dummies(_events.set_index('date')).reset_index()
    
    
class MakeDummies():
    
    def __init__(self):
        self.X = None
        self.y = None
    
    def fit(self, X, y=None):
        # X is a dataframe of sporting events
        self.X = X 
        self.y = y
        return self

    def transform(self):
        return pd.get_dummies(self.X.set_index('date')).reset_index()    
        

class JoinDataFrames():
    
    def __init__(self, weather, us_holidays, islamic_holidays, jewish_holidays,
                 events, seahawks, huskies, sounders):
        self.weather = weather
        self.us_holidays = us_holidays
        self.islamic_holidays = islamic_holidays
        self.jewish_holidays = jewish_holidays
        self.events = events
        self.seahawks = seahawks
        self.huskies = huskies
        self.sounders = sounders
        self.X = None
        self.y = None

    def fit(self, X, y=None):
        self.X = X
        self.y = y
        return self
    
    def transform(self):
        df1 = self.join_dfs(self.X, self.weather)
        df2 = self.join_dfs(df1, self.us_holidays)
        df3 = self.join_dfs(df2, self.islamic_holidays)
        df4 = self.join_dfs(df3, self.jewish_holidays)
        df5 = self.join_dfs(df4, self.events)
        df6 = self.join_dfs(df5, self.seahawks)
        df7 = self.join_dfs(df6, self.huskies)
        df8 = self.join_dfs(df7, self.sounders)
        return df8
            
    def join_dfs(self, df1, df2):
        return df1.join(df2.set_index('date'), on='date')



In [476]:
len(df2), len(neighboor_arr), len(dates)

(188505, 188505, 188505)

In [792]:
import pandas as pd
import numpy as np
import datetime as dt

import holidays
from calendra.asia import Qatar
from calendra.asia import HongKong

class SeattleHolidays:
    
    
    class CustomHolidays(holidays.US):
        def _populate(self, year=2019, start_year=2009, end_year=2030):
            # Populate the holiday list with the default US holidays
            holidays.US._populate(self, year)
            # Example: Add Ninja Turtle Day
            #self[dt.date(year, 7, 13)] = "Ninja Turtle Day"
            for year in range(start_year, end_year):
                # Add Valentine's day
                self[dt.date(year, 2, 14)] = "Valentines Day"
                # Add St Patricks Day
                self[dt.date(year, 3, 17)] = "St Patricks Day"
                # Add Easter
                self[holidays.easter(year=year)] = "Easter"
                # Add Good Friday
                self[holidays.easter(year=year)  -  dt.timedelta(days=2)] = "Good Friday"
                # Add Christmas Eve
                self[dt.date(year, 12, 24)] = "Christmas Eve"
                # Add New Years Eve
                self[dt.date(year, 12, 31)] = "New Years Eve"
                # Add Chinese New Year
                chinese = HongKong()
                for date, label in chinese.get_chinese_new_year(year):
                    self[date] = label


    class IslamicHolidays(holidays.HolidayBase):
        def _populate(self, year=2019, start_year=2009, end_year=2030):
            qatar_holidays = Qatar()
            # Populate the holiday list with blank base holidays
            holidays.HolidayBase._populate(self, year)
            for year in range(start_year, end_year):
                days = qatar_holidays.get_calendar_holidays(year)
                # Add Ramadan
                for i in range(1, len(days)):
                    if qatar_holidays.get_calendar_holidays(year)[i][1] == 'Start of ramadan':
                        for day in range(30):
                            self[qatar_holidays.get_calendar_holidays(year)[i][0] 
                                 + dt.timedelta(days=day-1)] = "Ramadan"
                    else:
                        self[qatar_holidays.get_calendar_holidays(2018)[1][0] 
                             - dt.timedelta(days=2)] = qatar_holidays.get_calendar_holidays(year)[i][1]


    class JewishHolidays(holidays.HolidayBase):
        def retrieve_data(self, filepath):
            df = pd.read_csv(filepath)
            return df

        def get_holidays(self, paths_list):
            df = self.retrieve_data(paths_list[0])
            for filepath in paths_list[1 : ]:
                cal = self.retrieve_data(filepath)
                df = pd.concat([df, cal])
            return df

        def _populate(self, year=2019, paths_list=['../data/hebcal_2010_usa.csv',
                                                   '../data/hebcal_2015_usa.csv', 
                                                   '../data/hebcal_2020_usa.csv',
                                                   '../data/hebcal_2025_usa.csv'],
                      start_year=2009, end_year=2030):
            hebcal = self.get_holidays(paths_list)
            hebcal.reset_index(drop=True, inplace=True)
            hebcal['date'] = pd.to_datetime(hebcal["Start Date"]).dt.date
            # Populate the holiday list with blank base holidays
            holidays.HolidayBase._populate(self, year)
            for year in range(start_year, end_year):
                for i in range(len(hebcal)):
                    self[hebcal['date'][i]] = hebcal['Subject'][i]


In [847]:
import pandas as pd
import numpy as np

from sklearn.pipeline import Pipeline

pipe = Pipeline(steps=[
    ('counter', CountCalls()),
    ('feturizer', FeaturizeCalls()),
    ('us_holidays', HolidayDummies()),
    ('islamic_holidays', HolidayDummies()),
    ('jewish_holidays', HolidayDummies()),
    ('event_dummifier', EventDummies()),
    ('seahawks_dummifier', MakeDummies()),
    ('huskies_dummifier', MakeDummies()),
    ('ssounders_dummifier', MakeDummies()),
    ('data_joiner', JoinDataFrames())
])


TypeError: __init__() missing 8 required positional arguments: 'weather', 'us_holidays', 'islamic_holidays', 'jewish_holidays', 'events', 'seahawks', 'huskies', and 'sounders'

In [895]:
retriever = DataRetrieval()

In [896]:
calls = retriever.get_calls_data()

In [897]:
counter = CountCalls('neighborhood')

In [898]:
counter.fit(calls)

<__main__.CountCalls at 0xb26bfca20>

In [899]:
calls_xfrmd = counter.transform()

In [900]:
len(calls_xfrmd)

188505

In [918]:
featurizer = FeaturizeCalls()

In [919]:
featurizer.fit(calls_xfrmd)

<__main__.FeaturizeCalls at 0xb89fb0898>

In [920]:
calls_w_features = featurizer.transform()

In [935]:
dt_dummifier = DateDummies()

In [936]:
dt_dummifier.fit(calls_w_features)

<__main__.DateDummies at 0xb3cdeaa20>

In [937]:
dt_dummies = dt_dummifier.transform()

In [938]:
len(dt_dummies)

188505

In [873]:
len(month_dow)

188505

In [868]:
len(calls_w_features)

188505

In [None]:
month_day_dummies = pd.get_dummies(df1[['date', 'month_weekday']].set_index('date'))
day_dummies = pd.get_dummies(df1[['date', 'day']].set_index('date'))
df2 = df1.join(month_day_dummies, on='date')
df3 = df2.join(day_dummies, on='date')  

In [740]:
weather = retriever.get_weather_data()

In [643]:
seahawks_schedule = retriever.get_seahawks_schedule()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a

In [644]:
huskies_schedule = retriever.get_huskies_schedule()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [645]:
sounders_schedule = retriever.get_sounders_schedule()

In [812]:
sports = SportsDummies()

In [813]:
sports.fit(seahawks_schedule)

<__main__.SportsDummies at 0xa98c1ba58>

In [815]:
seahawks = sports.transform()

In [816]:
sports.fit(huskies_schedule)

<__main__.SportsDummies at 0xa98c1ba58>

In [817]:
huskies = sports.transform()

In [819]:
sports.fit(sounders_schedule)

<__main__.SportsDummies at 0xa98c1ba58>

In [820]:
sounders = sports.transform()

In [723]:
us_holiday_dict = SeattleHolidays.CustomHolidays()

In [724]:
us_holiday_dict._populate()

In [725]:
holidayier = HolidayDummies()

In [726]:
holidayier.fit(us_holiday_dict)

<__main__.HolidayDummies at 0x14fdb7898>

In [727]:
us_holidays = holidayier.transform()

In [729]:
jewish_holiday_dict = SeattleHolidays.JewishHolidays()

In [730]:
jewish_holiday_dict._populate()

In [731]:
holidayier.fit(jewish_holiday_dict)

<__main__.HolidayDummies at 0x14fdb7898>

In [732]:
jewish_holidays = holidayier.transform()

In [787]:
islamic_holiday_dict = SeattleHolidays.IslamicHolidays()

In [788]:
islamic_holiday_dict._populate()



In [789]:
holidayier.fit(islamic_holiday_dict)

<__main__.HolidayDummies at 0x14fdb7898>

In [790]:
islamic_holidays = holidayier.transform()

In [791]:
islamic_holidays

Unnamed: 0,date,"holiday_Eid al-Adha, Eid al-Fitr",holiday_Ramadan
0,2009-08-21,0,1
1,2009-08-22,0,1
2,2009-08-23,0,1
3,2009-08-24,0,1
4,2009-08-25,0,1
5,2009-08-26,0,1
6,2009-08-27,0,1
7,2009-08-28,0,1
8,2009-08-29,0,1
9,2009-08-30,0,1


In [679]:
event_dummies = EventDummies()

In [680]:
event_dummies.fit()

<__main__.EventDummies at 0xad3e71b00>

In [682]:
events = event_dummies.transform()

In [None]:
def join_dfs(df1, df2):
    return df1.join(df2.set_index('date'), on='date')


In [744]:
def join_dfs(df1, df2):
    return df1.join(df2.set_index('date'), on='date')

def xform(calls, weather, us_holidays, islamic_holidays, jewish_holidays, events, seahawks, huskies, sounders):
    df1 = join_dfs(calls, weather)
    df2 = join_dfs(df1, us_holidays)
    df3 = join_dfs(df2, islamic_holidays)
    df4 = join_dfs(df3, jewish_holidays)
    df5 = join_dfs(df4, events)
    df6 = join_dfs(df5, seahawks)
    df7 = join_dfs(df6, huskies)
    df8 = join_dfs(df7, sounders)
    return df8

In [759]:
len(calls_w_features)

188505

In [794]:
def xform(calls, weather, us_holidays, islamic_holidays, jewish_holidays, events, seahawks, huskies, sounders):
    df1 = join_dfs(calls, weather)
    df2 = join_dfs(df1, us_holidays)
    df3 = join_dfs(df2, islamic_holidays)
    df4 = join_dfs(df3, jewish_holidays)
    df5 = join_dfs(df4, events)
    df6 = join_dfs(df5, seahawks)
    df7 = join_dfs(df6, huskies)
    df8 = join_dfs(df7, sounders)
    return df8
    

In [829]:
joiner = JoinDataFrames(weather, us_holidays, islamic_holidays, jewish_holidays,
              events, seahawks, huskies, sounders)

In [830]:
joiner.fit(calls_w_features)

<__main__.JoinDataFrames at 0xa98c1bcc0>

In [831]:
wow = joiner.transform()

In [832]:
len(wow)

188505

In [846]:
wow

Unnamed: 0,date,neighborhood,num_calls,dt_time,day_seq,year,month,day,day_of_week,month_day,...,local_event_Womens March,seahawks_game_SuperBowl,seahawks_game_away_Playoffs,seahawks_game_away_Regular,seahawks_game_home_Playoffs,seahawks_game_home_Regular,huskies_game_away,huskies_game_home,sounders_game_away,sounders_game_home
0,2010-01-01,ALASKA JUNCTION,1.0,2010-01-01,0,2010,01/01,1,4,01/01,...,,,,,,,,,,
1,2010-01-02,ALKI,0.0,2010-01-02,1,2010,01/02,2,5,01/02,...,,0.0,0.0,0.0,0.0,1.0,,,,
2,2010-01-03,BALLARD NORTH,1.0,2010-01-03,2,2010,01/03,3,6,01/03,...,,,,,,,,,,
3,2010-01-04,BALLARD SOUTH,0.0,2010-01-04,3,2010,01/04,4,0,01/04,...,,,,,,,,,,
4,2010-01-05,BELLTOWN,0.0,2010-01-05,4,2010,01/05,5,1,01/05,...,,,,,,,,,0.0,1.0
5,2010-01-06,BITTERLAKE,1.0,2010-01-06,5,2010,01/06,6,2,01/06,...,,,,,,,,,,
6,2010-01-07,BRIGHTON/DUNLAP,1.0,2010-01-07,6,2010,01/07,7,3,01/07,...,,,,,,,,,,
7,2010-01-08,CAPITOL HILL,1.0,2010-01-08,7,2010,01/08,8,4,01/08,...,,0.0,0.0,0.0,1.0,0.0,,,,
8,2010-01-09,CENTRAL AREA/SQUIRE PARK,0.0,2010-01-09,8,2010,01/09,9,5,01/09,...,,,,,,,,,0.0,1.0
9,2010-01-10,CHINATOWN/INTERNATIONAL DISTRICT,0.0,2010-01-10,9,2010,01/10,10,6,01/10,...,,,,,,,,,,


In [826]:
final = xform(calls_w_features, weather, us_holidays, islamic_holidays, jewish_holidays,
              events, seahawks_schedule, huskies_schedule, sounders_schedule)

In [827]:
len(final)

188505

In [765]:
df1 = join_dfs(calls_w_features, weather)

In [766]:
df2 = join_dfs(df1, us_holidays)

In [770]:
df3 = join_dfs(df2, islamic_holidays)

In [777]:
islamic_holidays

Unnamed: 0,date,holiday_Chanukah: 1 Candle,holiday_Chanukah: 2 Candles,holiday_Chanukah: 3 Candles,holiday_Chanukah: 4 Candles,holiday_Chanukah: 5 Candles,holiday_Chanukah: 6 Candles,holiday_Chanukah: 7 Candles,holiday_Chanukah: 8 Candles,holiday_Chanukah: 8th Day,...,holiday_Simchat Torah,holiday_Sukkot I,holiday_Sukkot II,holiday_Sukkot III (CH''M),holiday_Sukkot IV (CH''M),holiday_Sukkot V (CH''M),holiday_Sukkot VI (CH''M),holiday_Sukkot VII (Hoshana Raba),holiday_Tish'a B'Av,holiday_Yom Kippur
0,2010-02-27,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2010-02-28,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2010-03-29,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2010-03-30,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2010-03-31,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,2010-04-01,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,2010-04-02,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,2010-04-03,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,2010-04-04,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,2010-04-05,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [776]:
jewish_holidays

Unnamed: 0,date,holiday_Chanukah: 1 Candle,holiday_Chanukah: 2 Candles,holiday_Chanukah: 3 Candles,holiday_Chanukah: 4 Candles,holiday_Chanukah: 5 Candles,holiday_Chanukah: 6 Candles,holiday_Chanukah: 7 Candles,holiday_Chanukah: 8 Candles,holiday_Chanukah: 8th Day,...,holiday_Simchat Torah,holiday_Sukkot I,holiday_Sukkot II,holiday_Sukkot III (CH''M),holiday_Sukkot IV (CH''M),holiday_Sukkot V (CH''M),holiday_Sukkot VI (CH''M),holiday_Sukkot VII (Hoshana Raba),holiday_Tish'a B'Av,holiday_Yom Kippur
0,2010-02-27,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2010-02-28,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2010-03-29,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2010-03-30,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2010-03-31,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,2010-04-01,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,2010-04-02,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,2010-04-03,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,2010-04-04,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,2010-04-05,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [772]:
df4 = join_dfs(df3, jewish_holidays)

ValueError: columns overlap but no suffix specified: Index(['holiday_Chanukah: 1 Candle', 'holiday_Chanukah: 2 Candles',
       'holiday_Chanukah: 3 Candles', 'holiday_Chanukah: 4 Candles',
       'holiday_Chanukah: 5 Candles', 'holiday_Chanukah: 6 Candles',
       'holiday_Chanukah: 7 Candles', 'holiday_Chanukah: 8 Candles',
       'holiday_Chanukah: 8th Day', 'holiday_Erev Pesach',
       'holiday_Erev Purim', 'holiday_Erev Rosh Hashana',
       'holiday_Erev Shavuot', 'holiday_Erev Sukkot',
       'holiday_Erev Tish'a B'Av', 'holiday_Erev Yom Kippur',
       'holiday_Pesach I', 'holiday_Pesach II', 'holiday_Pesach III (CH''M)',
       'holiday_Pesach IV (CH''M)', 'holiday_Pesach V (CH''M)',
       'holiday_Pesach VI (CH''M)', 'holiday_Pesach VII',
       'holiday_Pesach VIII', 'holiday_Purim', 'holiday_Rosh Hashana 5771',
       'holiday_Rosh Hashana 5772', 'holiday_Rosh Hashana 5773',
       'holiday_Rosh Hashana 5774', 'holiday_Rosh Hashana 5775',
       'holiday_Rosh Hashana 5776', 'holiday_Rosh Hashana 5777',
       'holiday_Rosh Hashana 5778', 'holiday_Rosh Hashana 5779',
       'holiday_Rosh Hashana 5780', 'holiday_Rosh Hashana 5781',
       'holiday_Rosh Hashana 5782', 'holiday_Rosh Hashana 5783',
       'holiday_Rosh Hashana 5784', 'holiday_Rosh Hashana 5785',
       'holiday_Rosh Hashana 5786', 'holiday_Rosh Hashana 5787',
       'holiday_Rosh Hashana 5788', 'holiday_Rosh Hashana 5789',
       'holiday_Rosh Hashana 5790', 'holiday_Rosh Hashana II',
       'holiday_Shavuot I', 'holiday_Shavuot II', 'holiday_Shmini Atzeret',
       'holiday_Simchat Torah', 'holiday_Sukkot I', 'holiday_Sukkot II',
       'holiday_Sukkot III (CH''M)', 'holiday_Sukkot IV (CH''M)',
       'holiday_Sukkot V (CH''M)', 'holiday_Sukkot VI (CH''M)',
       'holiday_Sukkot VII (Hoshana Raba)', 'holiday_Tish'a B'Av',
       'holiday_Yom Kippur'],
      dtype='object')