In [1]:
import pandas as pd
import numpy as np

import datetime as dt

import holidays
from calendra.asia import Qatar
from calendra.asia import HongKong

from pyorbital.moon_phase import moon_phase

In [213]:
calls_import = pd.read_csv('../data/Calls_Table_data.csv', delimiter='\t', encoding='utf-16')

In [214]:
calls_full_df = calls_import

In [216]:
calls_full_df.drop_duplicates(inplace=True)

In [217]:
calls_full_df.reset_index(inplace=True)

In [218]:
calls_full_df.columns

Index(['index', 'BEAT', 'CALL_CODE', 'CALL_DESC', 'CALL GROUP', 'YEAR',
       'MAP COLOR', 'CALL VOLUME', 'Current Year Mark', 'Select View',
       'Boundary Selection', 'CALL DESCRIPTION', 'CALL CODE', 'Map Selection',
       'DAY_ONLY', 'EVENT', 'Geometry (MCPP MAP.shp)', 'Geometry',
       'Geometry (BEAT MAP.shp)', 'HOW_RECVD', 'NEIGHBORHOOD',
       'Number of Records', 'ORIG_TIME_QUEUED', 'PCT', 'PCT Full', 'PRECINCT',
       'Beat', 'first prec (BEAT MAP.shp)', 'Neighborho',
       'precinct (MCPP MAP.shp)', 'Precinct', 'sector (BEAT MAP.shp)',
       'st area sh (MCPP MAP.shp)', 'St Area Sh', 'st area sh (BEAT MAP.shp)',
       'st length  (MCPP MAP.shp)', 'St Length', 'st length  (BEAT MAP.shp)'],
      dtype='object')

In [219]:
calls_full_df['dt_time'] = pd.to_datetime(calls_full_df['ORIG_TIME_QUEUED'])

In [220]:
calls_full_df['date'] = calls_full_df['dt_time'].dt.date

In [221]:
calls_full_df['year'] = calls_full_df['dt_time'].dt.year

In [222]:
calls_full_df['month'] = calls_full_df['dt_time'].dt.month

In [223]:
calls_full_df['day'] = calls_full_df['dt_time'].dt.day

In [224]:
calls_full_df['day_of_week'] = calls_full_df['dt_time'].dt.weekday

In [225]:
calls_full_df['month_day'] = calls_full_df['dt_time'].dt.strftime('%m/%d')
    

In [15]:
class CustomHolidays(holidays.US):
    def _populate(self, year):
        # Populate the holiday list with the default US holidays
        holidays.US._populate(self, year)
        # Example: Add Ninja Turtle Day
        #self[dt.date(year, 7, 13)] = "Ninja Turtle Day"
        for year in range(2009, 2030):
            # Add Valentine's day
            self[dt.date(year, 2, 14)] = "Valentines Day"
            # Add St Patricks Day
            self[dt.date(year, 3, 17)] = "St Patricks Day"
            # Add Easter
            self[holidays.easter(year=year)] = "Easter"
            # Add Good Friday
            self[holidays.easter(year=year)  -  dt.timedelta(days=2)] = "Good Friday"
            # Add Christmas Eve
            self[dt.date(year, 12, 24)] = "Christmas Eve"
            # Add New Years Eve
            self[dt.date(year, 12, 31)] = "New Years Eve"
            # Add Chinese New Year
            chinese = HongKong()
            for date, label in chinese.get_chinese_new_year(year):
                self[date] = label

In [61]:
qatar_holidays = Qatar()
class IslamicHolidays(holidays.HolidayBase):
    def _populate(self, year):
        # Populate the holiday list with blank base holidays
        holidays.HolidayBase._populate(self, year)
        for year in range(2009, 2030):
            days = qatar_holidays.get_calendar_holidays(year)
            # Add Ramadan
            for i in range(1, len(days)):
                if qatar_holidays.get_calendar_holidays(year)[i][1] == 'Start of ramadan':
                    for day in range(30):
                        self[qatar_holidays.get_calendar_holidays(year)[i][0] + dt.timedelta(days=day-1)] = "Ramadan"
                else:
                    self[qatar_holidays.get_calendar_holidays(2018)[1][0] - dt.timedelta(days=2)] = qatar_holidays.get_calendar_holidays(year)[i][1]
                       

In [17]:
hebcal_2010 = pd.read_csv('../data/hebcal_2010_usa.csv')
hebcal_2015 = pd.read_csv('../data/hebcal_2015_usa.csv')
hebcal_2020 = pd.read_csv('../data/hebcal_2020_usa.csv')
hebcal_2025 = pd.read_csv('../data/hebcal_2025_usa.csv')

hebcal = pd.concat([hebcal_2010, hebcal_2015, hebcal_2020, hebcal_2025])
hebcal.reset_index(inplace=True)
hebcal['date'] = pd.to_datetime(hebcal["Start Date"]).dt.date

In [18]:
class JewishHolidays(holidays.HolidayBase):
    def _populate(self, year):
        # Populate the holiday list with blank base holidays
        holidays.HolidayBase._populate(self, year)
        for year in range(2009, 2030):
            for i in range(len(hebcal)):
                self[hebcal['date'][i]] = hebcal['Subject'][i]           

In [115]:
custom_holidays = []
for date, name in sorted(CustomHolidays(years=2016).items()):
    custom_holidays.append([date, name])

custom_holidays = pd.DataFrame(custom_holidays, columns=['date', 'holiday'])

In [118]:
calls_full_df = calls_full_df.join(custom_holidays.set_index('date'), on='date')

In [124]:
islamic_holidays = []
for date, name in sorted(IslamicHolidays(years=2016).items()):
    islamic_holidays.append([date, name])

islamic_holidays = pd.DataFrame(islamic_holidays, columns=['date', 'islamic_holiday'])



In [226]:
calls_full_df = calls_full_df.join(islamic_holidays.set_index('date'), on='date')

In [126]:
jewish_holidays = []
for date, name in sorted(JewishHolidays(years=2016).items()):
    jewish_holidays.append([date, name])

jewish_holidays = pd.DataFrame(jewish_holidays, columns=['date', 'jewish_holiday'])

In [227]:
calls_full_df = calls_full_df.join(jewish_holidays.set_index('date'), on='date')

In [19]:
events = ({
    'Pride Parade' : ['6/30/2019', '6/24/2018', '6/25/2017', '6/26/2016', '6/28/2015',
                      '6/29/2014', '6/30/2013', '6/24/2012', '6/26/2011', '6/27/2010'],
    'Seafair' : ['8/2/2019', '8/3/2019', '8/4/2019', '8/3/2018', '8/4/2018', '8/5/2018',
                '8/4/2017', '8/5/2017', '8/6/2017', '8/5/2016', '8/6/2016', '8/7/2016',
                '7/31/2015', '8/1/2015', '8/2/2015', '8/1/2014', '8/2/2014', '8/3/2014',
                '8/2/2013', '8/3/2013', '8/4/2013', '8/3/2012', '8/4/2012', '8/5/2012',
                '8/5/2011', '8/6/2011', '8/7/2011', '8/6/2010', '8/7/2010', '8/8/2010' ],
    'Soltice Parade': ['6/30/2019', '6/16/2018', '6/17/2017', '6/18/2016', '6/20/2015',
                      '6/21/2014', '6/22/2013', '6/16/2012', '6/18/2011', '6/19/2010'],
    'Womens March' : ['1/19/2019', '1/20/2018', '1/21/2017'],
})

In [254]:
seattle_events = []
for event in events.keys():
    for day in events[event]:
        seattle_events.append([dt.datetime.strptime(day, '%m/%d/%Y'), event])
seattle_events = pd.DataFrame(seattle_events, columns=['date','local_event'])
seattle_events['date'] = seattle_events['date'].dt.date

In [257]:
calls_full_df = calls_full_df.join(seattle_events.set_index('date'), on='date')

In [229]:
calls_full_df['full_moon'] = moon_phase(calls_full_df['date'].values)>.99
calls_full_df['new_moon'] = moon_phase(calls_full_df['date'].values)<.01

In [230]:
weather_import = pd.read_csv('../data/historical_weather.csv')
weather_import['date'] = pd.to_datetime(weather_import['DATE']).dt.date

In [231]:
weather_hist = (weather_import[['date', 'TMIN', 'TMAX', 'PRCP', 'SNOW']]
                .rename(columns={'PRCP':'precip', 'TMIN':'temp_min', 'TMAX':'temp_max',
                                 'PRCP':'precip', 'SNOW':'snow'}))

In [232]:
weather_hist['precip^2'] = weather_hist['precip']**2
weather_hist['snow^2'] = weather_hist['snow']**2

In [233]:
calls_full_df = calls_full_df.join(weather_hist.set_index('date'), on='date')

In [258]:
calls_full_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96208 entries, 0 to 96207
Data columns (total 56 columns):
index                        96208 non-null int64
BEAT                         96208 non-null object
CALL_CODE                    96208 non-null object
CALL_DESC                    96208 non-null object
CALL GROUP                   96208 non-null object
YEAR                         96208 non-null int64
MAP COLOR                    96036 non-null object
CALL VOLUME                  96208 non-null int64
Current Year Mark            96208 non-null object
Select View                  96208 non-null object
Boundary Selection           96208 non-null object
CALL DESCRIPTION             96208 non-null object
CALL CODE                    96208 non-null object
Map Selection                96036 non-null object
DAY_ONLY                     96208 non-null object
EVENT                        96208 non-null int64
Geometry (MCPP MAP.shp)      96036 non-null object
Geometry                    

In [315]:
seahawks_schedule = pd.read_csv('../data/seahawks_schedule.csv')
huskies_schedule = pd.read_csv('../data/huskies_schedule.csv')
sounders_schedule = pd.read_csv('../data/sounders_schedule.csv')

In [317]:
seahawks_schedule.columns

Index(['Unnamed: 0', 'year', 'Week', 'Day', 'Date', 'Unnamed: 5', 'Unnamed: 6',
       'Unnamed: 7', 'OT', 'Rec', 'Unnamed: 10', 'Opp'],
      dtype='object')

In [370]:
seahawks_schedule = seahawks_schedule[seahawks_schedule['Opp'].notna()]
seahawks_schedule.reset_index(drop=True)
seahawks_schedule = seahawks_schedule[seahawks_schedule['Opp'] != 'Bye Week']
seahawks_schedule = seahawks_schedule.reset_index(drop=True)

In [376]:
seahawks_schedule['date'] = ''

for i in range(len(seahawks_schedule)):
    seahawks_schedule['date'][i] = (f"{seahawks_schedule.iloc[i]['Date']}, {seahawks_schedule.iloc[i]['year']}")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [382]:
seahawks_schedule['date'] = pd.to_datetime(seahawks_schedule['date']).dt.date

Unnamed: 0.1,Unnamed: 0,year,Week,Day,Date,Unnamed: 5,Unnamed: 6,Unnamed: 7,OT,Rec,Unnamed: 10,Opp,date
0,0,2010,1,Sun,September 12,4:16PM ET,boxscore,W,,1-0,,San Francisco 49ers,2010-09-12
1,1,2010,2,Sun,September 19,4:06PM ET,boxscore,L,,1-1,@,Denver Broncos,2010-09-19
2,2,2010,3,Sun,September 26,4:16PM ET,boxscore,W,,2-1,,San Diego Chargers,2010-09-26
3,3,2010,4,Sun,October 3,1:00PM ET,boxscore,L,,2-2,@,St. Louis Rams,2010-10-03
4,5,2010,6,Sun,October 17,1:02PM ET,boxscore,W,,3-2,@,Chicago Bears,2010-10-17
5,6,2010,7,Sun,October 24,4:06PM ET,boxscore,W,,4-2,,Arizona Cardinals,2010-10-24
6,7,2010,8,Sun,October 31,4:15PM ET,boxscore,L,,4-3,@,Oakland Raiders,2010-10-31
7,8,2010,9,Sun,November 7,4:05PM ET,boxscore,L,,4-4,,New York Giants,2010-11-07
8,9,2010,10,Sun,November 14,4:15PM ET,boxscore,W,,5-4,@,Arizona Cardinals,2010-11-14
9,10,2010,11,Sun,November 21,4:05PM ET,boxscore,L,,5-5,@,New Orleans Saints,2010-11-21


In [388]:
seahawks_schedule['Week'].unique()

array(['1', '2', '3', '4', '6', '7', '8', '9', '10', '11', '12', '13',
       '14', '15', '16', '17', 'Wild Card', 'Division', '5',
       'Conf. Champ.', 'SuperBowl'], dtype=object)

In [419]:
seahawks_schedule['seahawks_game'] = ''
for i in range(len(seahawks_schedule)):
    if type(seahawks_schedule.iloc[i]['Unnamed: 10']) == str:
        if seahawks_schedule.iloc[i]['Week'] == 'SuperBowl':
            seahawks_schedule['seahawks_game'][i] = 'SuperBowl'
        elif seahawks_schedule.iloc[i]['Week'] in ['Wild Card', 'Division','Conf. Champ.']:
            seahawks_schedule['seahawks_game'][i] = 'away_Playoffs'
        else:
            seahawks_schedule['seahawks_game'][i] = 'away_Regular'
    else:     
        if seahawks_schedule.iloc[i]['Week'] == 'SuperBowl':
            seahawks_schedule['seahawks_game'][i] = 'SuperBowl'
        elif seahawks_schedule.iloc[i]['Week'] in ['Wild Card', 'Division','Conf. Champ.']:
            seahawks_schedule['seahawks_game'][i] = 'home_Playoffs'
        else:
            seahawks_schedule['seahawks_game'][i] = 'home_Regular'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing

In [422]:
seahawks_schedule

Unnamed: 0.1,Unnamed: 0,year,Week,Day,Date,Unnamed: 5,Unnamed: 6,Unnamed: 7,OT,Rec,Unnamed: 10,Opp,date,type,seahawks_game
0,0,2010,1,Sun,September 12,4:16PM ET,boxscore,W,,1-0,,San Francisco 49ers,2010-09-12,Regular,home_Regular
1,1,2010,2,Sun,September 19,4:06PM ET,boxscore,L,,1-1,@,Denver Broncos,2010-09-19,Regular,away_Regular
2,2,2010,3,Sun,September 26,4:16PM ET,boxscore,W,,2-1,,San Diego Chargers,2010-09-26,Regular,home_Regular
3,3,2010,4,Sun,October 3,1:00PM ET,boxscore,L,,2-2,@,St. Louis Rams,2010-10-03,Regular,away_Regular
4,5,2010,6,Sun,October 17,1:02PM ET,boxscore,W,,3-2,@,Chicago Bears,2010-10-17,Regular,away_Regular
5,6,2010,7,Sun,October 24,4:06PM ET,boxscore,W,,4-2,,Arizona Cardinals,2010-10-24,Regular,home_Regular
6,7,2010,8,Sun,October 31,4:15PM ET,boxscore,L,,4-3,@,Oakland Raiders,2010-10-31,Regular,away_Regular
7,8,2010,9,Sun,November 7,4:05PM ET,boxscore,L,,4-4,,New York Giants,2010-11-07,Regular,home_Regular
8,9,2010,10,Sun,November 14,4:15PM ET,boxscore,W,,5-4,@,Arizona Cardinals,2010-11-14,Regular,away_Regular
9,10,2010,11,Sun,November 21,4:05PM ET,boxscore,L,,5-5,@,New Orleans Saints,2010-11-21,Regular,away_Regular


In [423]:
calls_full_df = calls_full_df.join(seahawks_schedule[['date','seahawks_game']].set_index('date'), on='date')

In [424]:
calls_full_df

Unnamed: 0,index,BEAT,CALL_CODE,CALL_DESC,CALL GROUP,YEAR,MAP COLOR,CALL VOLUME,Current Year Mark,Select View,...,new_moon,temp_min,temp_max,precip,snow,precip^2,snow^2,local_event,type,seahawks_game
0,0,R3,082,DV - DOMESTIC VIOLENCE (ARREST DISCRETIONARY),DOMESTIC DISTURBANCE/VIOLENCE,2010,S,1,Previous Year,Monthly View,...,False,39,46,0.81,0.0,0.6561,0.0,,Playoffs,home_Playoffs
1,1,N2,081,"DV - ARGUMENTS, DISTURBANCE (NO ARREST)",DOMESTIC DISTURBANCE/VIOLENCE,2010,N,1,Previous Year,Monthly View,...,False,39,46,0.81,0.0,0.6561,0.0,,Playoffs,home_Playoffs
2,4,N3,081,"DV - ARGUMENTS, DISTURBANCE (NO ARREST)",DOMESTIC DISTURBANCE/VIOLENCE,2010,N,1,Previous Year,Monthly View,...,False,39,46,0.81,0.0,0.6561,0.0,,Playoffs,home_Playoffs
3,6,F2,081,"DV - ARGUMENTS, DISTURBANCE (NO ARREST)",DOMESTIC DISTURBANCE/VIOLENCE,2010,SW,1,Previous Year,Monthly View,...,False,39,46,0.81,0.0,0.6561,0.0,,Playoffs,home_Playoffs
4,7,Q3,080,DV - DOMESTIC THREATS BY PHONE OR WRITING,DOMESTIC DISTURBANCE/VIOLENCE,2010,W,1,Previous Year,Monthly View,...,False,42,57,0.00,0.0,0.0000,0.0,,,
5,13,W1,087,DV - ENFORCE COURT ORDER (ARREST MANDATED),DOMESTIC DISTURBANCE/VIOLENCE,2010,SW,1,Previous Year,Monthly View,...,False,44,49,0.98,0.0,0.9604,0.0,,,
6,15,B3,085,DV - SERVICE OF COURT ORDER,DOMESTIC DISTURBANCE/VIOLENCE,2010,N,1,Previous Year,Monthly View,...,False,44,49,0.98,0.0,0.9604,0.0,,,
7,16,R2,081,"DV - ARGUMENTS, DISTURBANCE (NO ARREST)",DOMESTIC DISTURBANCE/VIOLENCE,2010,S,1,Previous Year,Monthly View,...,False,44,49,0.98,0.0,0.9604,0.0,,,
8,18,S3,081,"DV - ARGUMENTS, DISTURBANCE (NO ARREST)",DOMESTIC DISTURBANCE/VIOLENCE,2010,S,1,Previous Year,Monthly View,...,False,42,57,0.00,0.0,0.0000,0.0,,,
9,23,S1,081,"DV - ARGUMENTS, DISTURBANCE (NO ARREST)",DOMESTIC DISTURBANCE/VIOLENCE,2010,S,1,Previous Year,Monthly View,...,False,42,57,0.00,0.0,0.0000,0.0,,,
