### Events API

#### Source Directory

In [1]:
%pwd

'C:\\Users\\t0272m1\\Projects\\HR Analytics\\notebooks'

In [2]:
source_path = 'E:\HR-Analytics\source'
source_path

'E:\\HR-Analytics\\source'

In [3]:
import os
os.chdir(source_path)
%pwd

'E:\\HR-Analytics\\source'

In [4]:
ls

 Volume in drive E is DATA
 Volume Serial Number is AEFF-63BE

 Directory of E:\HR-Analytics\source

09/26/2019  01:44 PM    <DIR>          .
09/26/2019  01:44 PM    <DIR>          ..
05/02/2019  11:01 AM                 2 __init__.py
10/07/2019  12:26 PM    <DIR>          __pycache__
10/14/2019  02:58 AM           432,513 abs_weather_tbl.csv
07/15/2019  02:57 PM            16,924 base_table.py
10/11/2019  01:51 PM            26,719 calendrical.py
10/14/2019  02:05 PM             3,693 config_bvp.yml
09/20/2019  09:25 AM             4,201 config_jnap.yml
09/20/2019  09:25 AM            13,481 config_shap.yml
09/20/2019  09:25 AM             6,587 config_tac.yml
10/14/2019  02:19 PM             2,480 config_wap.yml
09/20/2019  09:25 AM             6,360 config_wtap.yml
10/14/2019  11:54 AM         2,986,057 darksky_weather.log
09/18/2019  11:19 AM             6,314 database.py
09/26/2019  01:10 PM            15,163 main.py
10/10/2019  09:09 AM            49,159 model.py
10/02/2019  10:4

#### Imports

In [5]:
import calendar
import datetime
import itertools
import jaydebeapi as jdb
import json
import logging
import math
import matplotlib
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas.io.sql as psql
import predicthq
import psycopg2
import random
import requests
import seaborn as sns
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sqlalchemy import create_engine
import statsmodels as sm
from statsmodels.tsa.statespace.sarimax import SARIMAX
import urllib.request

In [6]:
# Internal Python Packages
import calendrical
from calendrical import get_nth_kday_of_month
from database import connect_greenplum
from database import create_frame_from_pg
from database import create_sqlalchemy_engine
from main import get_pipeline_config
from main import get_plant_config
from model import calendrical_features
from model import get_holidays
from model import holiday_mean

In [7]:
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.debug("test")

DEBUG:root:test


In [8]:
pd.set_option('display.max_rows', 1000)

#### Read in Pipeline and Plant Specs

INFO:main:Pipeline Configuration
INFO:main:PIPELINE CONFIGURATION:
INFO:main:datalake:schema          = lab_datasci
INFO:main:datalake:host            = shbdmdwp001.servers.chrysler.com
INFO:main:datalake:port            = 5432
INFO:main:datalake:user            = datasci
INFO:main:datalake:password        = datasci_01
INFO:main:datalake:database        = odshawq
INFO:main:jdbc:driver              = com.ibm.db2.jcc.DB2Driver
INFO:main:jdbc:server              = jdbc:db2://SRVR1874.dbms.chrysler.com:18740/AUCERPTP
INFO:main:jdbc:user                = datasci
INFO:main:jdbc:password            = datasci_01
INFO:main:jdbc:jar_file            = c:/installed/sqllib/java/db2jcc4.jar
INFO:main:holidays:calendar_us     = ['2016-01-01', '2016-01-18', '2016-03-25', '2016-03-28', '2016-05-30', '2016-07-04', '2016-09-05', '2016-11-08', '2016-11-11', '2016-11-24', '2016-11-25', '2016-12-26', '2016-12-27', '2016-12-28', '2016-12-29', '2016-12-30', '2017-01-02', '2017-01-22', '2017-04-14', '2017-04-1

In [10]:
plant_id = 'jnap'
plant_id

'jnap'

In [11]:
pipeline_specs['plant_id'] = plant_id
pipeline_specs['project_directory'] = source_path
pipeline_specs

{'datalake': {'schema': 'lab_datasci',
  'host': 'shbdmdwp001.servers.chrysler.com',
  'port': 5432,
  'user': 'datasci',
  'password': 'datasci_01',
  'database': 'odshawq'},
 'jdbc': {'driver': 'com.ibm.db2.jcc.DB2Driver',
  'server': 'jdbc:db2://SRVR1874.dbms.chrysler.com:18740/AUCERPTP',
  'user': 'datasci',
  'password': 'datasci_01',
  'jar_file': 'c:/installed/sqllib/java/db2jcc4.jar'},
 'holidays': {'calendar_us': ['2016-01-01',
   '2016-01-18',
   '2016-03-25',
   '2016-03-28',
   '2016-05-30',
   '2016-07-04',
   '2016-09-05',
   '2016-11-08',
   '2016-11-11',
   '2016-11-24',
   '2016-11-25',
   '2016-12-26',
   '2016-12-27',
   '2016-12-28',
   '2016-12-29',
   '2016-12-30',
   '2017-01-02',
   '2017-01-22',
   '2017-04-14',
   '2017-04-17',
   '2017-05-29',
   '2017-07-04',
   '2017-09-04',
   '2017-11-10',
   '2017-11-22',
   '2017-11-23',
   '2017-12-25',
   '2017-12-26',
   '2017-12-29',
   '2017-12-30',
   '2017-12-31',
   '2018-01-01',
   '2018-01-15',
   '2018-03-30'

In [12]:
plant_specs = get_plant_config(pipeline_specs)
plant_specs

INFO:main:Plant Configuration
INFO:main:PLANT CONFIGURATION:
INFO:main:plant:code                = 4012
INFO:main:plant:latitude            = 42.375292
INFO:main:plant:longitude           = -82.966222
INFO:main:plant:market_id           = 7
INFO:main:plant:shift_days          = 4
INFO:main:plant:shift_hours         = 10
INFO:main:plant:absence_codes       = ['BERC', 'BERE', 'BERU', 'BERX', 'CARE', 'CARU', 'FMLA', 'FMLD', 'FMLU', 'HOMD', 'ILFE', 'ILFU', 'IPBE', 'IPME', 'IPNU', 'IPSE', 'JURE', 'MISE', 'MISU', 'PERU', 'PPAA', 'PPAU', 'TRAG', 'WTRU']
INFO:main:plant:exclude_dates       = [['2018-08-25', '2018-08-31'], ['2019-01-02', '2019-01-05'], ['2020-08-10', '2020-08-14']]
INFO:main:base_table:start_date     = 2017-01-01
INFO:main:base_table:end_date       = None
INFO:main:base_table:create_table   = True
INFO:main:base_table:write_table    = True
INFO:main:base_table:use_table_date = 2019-09-08
INFO:main:model:models              = ['sarimax']
INFO:main:model:target              = abs

{'plant': {'code': 4012,
  'latitude': 42.375292,
  'longitude': -82.966222,
  'market_id': 7,
  'shift_days': 4,
  'shift_hours': 10,
  'absence_codes': ['BERC',
   'BERE',
   'BERU',
   'BERX',
   'CARE',
   'CARU',
   'FMLA',
   'FMLD',
   'FMLU',
   'HOMD',
   'ILFE',
   'ILFU',
   'IPBE',
   'IPME',
   'IPNU',
   'IPSE',
   'JURE',
   'MISE',
   'MISU',
   'PERU',
   'PPAA',
   'PPAU',
   'TRAG',
   'WTRU'],
  'exclude_dates': [['2018-08-25', '2018-08-31'],
   ['2019-01-02', '2019-01-05'],
   ['2020-08-10', '2020-08-14']]},
 'base_table': {'start_date': datetime.date(2017, 1, 1),
  'end_date': None,
  'create_table': True,
  'write_table': True,
  'use_table_date': datetime.date(2019, 9, 8)},
 'model': {'models': ['sarimax'],
  'target': 'absences_unplanned',
  'npreds': 4,
  'p_arima': 1,
  'd_arima': 0,
  'q_arima': 0,
  'features': ['actual_hours',
   'lost_hours',
   'absences_unplanned_rolling_median_12',
   'absences_unplanned_rolling_median_20',
   'quarter',
   'month',
  

#### Set Proxy

In [13]:
https_proxy = 'https://t0272m1:AlphaPy2019$@iproxy.appl.chrysler.com:9090'
proxyDict = {"https" : https_proxy}
proxyDict

{'https': 'https://t0272m1:AlphaPy2019$@iproxy.appl.chrysler.com:9090'}

#### Read Events Table

In [14]:
https_proxy = 'https://t0272m1:AlphaPy2019$@iproxy.appl.chrysler.com:9090'
proxyDict = {"https" : https_proxy}
proxyDict

{'https': 'https://t0272m1:AlphaPy2019$@iproxy.appl.chrysler.com:9090'}

#### TicketMaster API

In [None]:
# TicketMaster API
url_string = "https://app.ticketmaster.com/discovery/v2/events.json?apikey=YGFCvuvjiyjJmT7KbvluGhTASZsBRQxt&page=0"
marketid = 7
url_string += "&marketId={}".format(marketid)
nevents = 200
url_string += "&size={}".format(nevents)
url_string

In [None]:
# add to URL string for a given date range
start_date = "2019-10-01T00:00:00Z"
end_date = "2019-11-01T00:00:00Z"
start_date_string = "&startDateTime={}".format(start_date)
end_date_string = "&endDateTime={}".format(end_date)
url_string += start_date_string + end_date_string
url_string

In [None]:
response = requests.get(
    url=url_string,
    proxies=proxyDict
)

In [None]:
# TicketMaster API
response = requests.get(
    url="https://app.ticketmaster.com/discovery/v2/events.json?apikey=YGFCvuvjiyjJmT7KbvluGhTASZsBRQxt&marketId=7&size=200&page=0",
    proxies=proxyDict
)

In [None]:
# TicketMaster API
response = requests.get(
    url="https://app.ticketmaster.com/discovery/v2/events.json?apikey=YGFCvuvjiyjJmT7KbvluGhTASZsBRQxt&dmaId=266&page=0",
    proxies=proxyDict
)

In [None]:
response_json = response.json()

In [None]:
response

In [None]:
response.status_code

In [None]:
json_data = json.loads(response.text)

In [None]:
json_data

In [None]:
json_data['_embedded']['events']

In [None]:
len(json_data['_embedded']['events'])

In [None]:
for i, event in enumerate(json_data['_embedded']['events']):
    print("\nNext Event\n")
    print(i+1, event['name'], event['dates']['start']['localDate'])

In [None]:
event

In [None]:
event['name']

In [None]:
event['dates']['start']['localDate']

In [None]:
event['dates']['start']['localTime']

In [None]:
event['classifications'][0]['segment']['name']

In [None]:
event['classifications'][0]['genre']['name']

In [None]:
event['priceRanges'][0]['max']

In [None]:
event_list = []

In [None]:
for i, event in enumerate(json_data['_embedded']['events']):
    print("\nNext Event\n")
    print(i+1, event['name'], event['dates']['start']['localDate'])
    try:
        local_time = event['dates']['start']['localTime']
    except:
        local_time = 'Not Available'
    event_list.append((marketid,
                       event['name'],
                       event['dates']['start']['localDate'],
                       local_time,
                       event['classifications'][0]['genre']['name']))        

In [None]:
# Create event table
df_event = pd.DataFrame(event_list, columns=['market_id', 'event_name', 'event_date', 'event_time', 'genre'])

In [None]:
df_event.sort_values(by=['market_id', 'event_date', 'event_time'], inplace=True)

In [None]:
df_event.shape

In [None]:
df_event['genre'].value_counts()

In [None]:
def write_frame_to_pg(df, table_name, pipeline_specs, data_path):
    # extract specifications
    schema = pipeline_specs['datalake']['schema']
    # establish connection
    print("Establishing connection to Greenplum")
    conn_dl, curs_dl = connect_greenplum(pipeline_specs['datalake'])
    # establish SQL Alchemy connection
    engine_dl = create_sqlalchemy_engine(pipeline_specs['datalake'])
    # create table name
    table = str.lower('_'.join([table_name, 'tbl']))
    full_table_name = '.'.join([schema, table])
    print("Table Name: %s" % full_table_name)
    # drop table if already exists
    print("Dropping table")
    drop_table = 'drop table if exists ' + full_table_name
    curs_dl.execute(drop_table)
    # create empty table
    print("Creating table %s" % full_table_name)
    empty_table = pd.io.sql.get_schema(df, full_table_name, con=engine_dl)
    empty_table = empty_table.replace('"', '')
    print(empty_table)
    curs_dl.execute(empty_table)
    # save the CSV file
    file_name = table + '.csv'
    csv_file = '/'.join([data_path, file_name])
    print("Saving CSV file %s" % csv_file)
    df.fillna(0, inplace=True)
    df.to_csv(csv_file, index=False)
    # create sql for copying table
    SQL_STATEMENT = """
        COPY %s FROM STDIN WITH
            CSV
            HEADER
            DELIMITER AS ','
        """
    # copy file to the table
    print("Copying table from %s" % csv_file)
    f = open(csv_file)
    curs_dl.copy_expert(sql=SQL_STATEMENT % full_table_name, file=f)
    # execute grants
    print("Executing grants")
    grant = 'grant select on table ' + full_table_name + ' to datasci'
    curs_dl.execute(grant)
    grant = 'grant select on table ' + full_table_name + ' to hrba'
    curs_dl.execute(grant)
    # close connection
    print("Closing connection")
    conn_dl.commit()
    curs_dl.close()
    return

In [None]:
data_path = 'E:\HR-Analytics\data'
data_path

In [15]:
table_name = '_'.join(['abs', 'event_calendar'])
table_name

'abs_event_calendar'

In [None]:
write_frame_to_pg(df_event, table_name, pipeline_specs, data_path)

In [16]:
# Connect to Greenplum
conn_dl, curs_dl = connect_greenplum(pipeline_specs['datalake'])
conn_dl, curs_dl

INFO:database:Data Lake Connection Created
INFO:database:Setting role to datasci
INFO:database:Setting timeout value


(<connection object at 0x0000000018D52DB0; dsn: 'user=datasci password=xxx dbname=odshawq host=shbdmdwp001.servers.chrysler.com port=5432', closed: 0>,
 <cursor object at 0x0000000001E69908; closed: 0>)

In [17]:
schema = pipeline_specs['datalake']['schema']
query = "select * from \"" + schema + "\".\"" + table_name + "_tbl\""
query

'select * from "lab_datasci"."abs_event_calendar_tbl"'

In [18]:
df_event_in = create_frame_from_pg(conn_dl, table_name, query)

INFO:database:
Pandas Schema
INFO:database:market_id, Type: int64
INFO:database:event_name, Type: object
INFO:database:event_date, Type: object
INFO:database:event_time, Type: object
INFO:database:genre, Type: object


In [19]:
df_event_in.shape

(534, 5)

In [20]:
df_event_in.columns

Index(['market_id', 'event_name', 'event_date', 'event_time', 'genre'], dtype='object')

In [21]:
df_event_in

Unnamed: 0,market_id,event_name,event_date,event_time,genre
0,7,Nickelodeon's JoJo Siwa D.R.E.A.M. The Tour,2019-09-04,19:00:00,Pop
1,7,Cats (Touring),2019-09-04,19:30:00,Theatre
2,7,UniverSoul Circus,2019-09-05,19:00:00,Circus & Specialty Acts
3,7,Cats (Touring),2019-09-05,19:30:00,Theatre
4,7,Lenny Kravitz,2019-09-05,20:00:00,Rock
5,7,Social Distortion & Flogging Molly: Summer Tou...,2019-09-06,18:00:00,Rock
6,7,Kid Rock,2019-09-06,19:30:00,Rock
7,7,Cats (Touring),2019-09-06,19:30:00,Theatre
8,7,Cats (Touring),2019-09-07,14:00:00,Theatre
9,7,"Yo Gotti, Moneybagg Yo & 42 Dugg",2019-09-07,19:00:00,Hip-Hop/Rap


In [22]:
df_event_in.iloc[246:250]

Unnamed: 0,market_id,event_name,event_date,event_time,genre
246,7,Disney On Ice presents Dream Big,2019-12-22,17:00:00,Ice Shows
247,7,Detroit Red Wings vs. Arizona Coyotes,2019-12-22,19:00:00,Hockey
248,7,Detroit Pistons vs. Philadelphia 76ers,2019-12-23,19:00:00,Basketball
249,7,Detroit Pistons vs. Washington Wizards,2019-12-26,19:00:00,Basketball


In [23]:
df_event_in.index[246:250]

RangeIndex(start=246, stop=250, step=1)

In [60]:
df_event_in['genre'].value_counts(dropna=False)

Theatre                    139
Basketball                  89
Hockey                      89
Rock                        52
Circus & Specialty Acts     26
Ice Shows                   22
Country                     19
Comedy                      17
Football                    14
Hip-Hop/Rap                 13
R&B                         10
Motorsports/Racing           8
Family                       8
Pop                          5
Dance/Electronic             4
Metal                        4
Religious                    3
Children's Theatre           3
Fashion                      1
Jazz                         1
Classical                    1
Folk                         1
Miscellaneous Theatre        1
Other                        1
Blues                        1
World                        1
Boxing                       1
Name: genre, dtype: int64

In [54]:
lf_join = lambda x : ';'.join(x)
lf_join_str = lambda x : ';'.join(map(str, x))
agg_dict = {'market_id'  : lf_join_str,
            'event_name' : lf_join,
            'event_time' : lf_join,
            'genre'      : lf_join}
agg_dict

{'market_id': <function __main__.<lambda>(x)>,
 'event_name': <function __main__.<lambda>(x)>,
 'event_time': <function __main__.<lambda>(x)>,
 'genre': <function __main__.<lambda>(x)>}

In [55]:
# Test of multiple events on same day
df_event_merge = df_event_in.groupby(['event_date']).agg(agg_dict).reset_index()
df_event_merge

Unnamed: 0,event_date,market_id,event_name,event_time,genre
0,2019-09-04,7;7,Nickelodeon's JoJo Siwa D.R.E.A.M. The Tour;Ca...,19:00:00;19:30:00,Pop;Theatre
1,2019-09-05,7;7;7,UniverSoul Circus;Cats (Touring);Lenny Kravitz,19:00:00;19:30:00;20:00:00,Circus & Specialty Acts;Theatre;Rock
2,2019-09-06,7;7;7,Social Distortion & Flogging Molly: Summer Tou...,18:00:00;19:30:00;19:30:00,Rock;Rock;Theatre
3,2019-09-07,7;7;7;7;7;7,"Cats (Touring);Yo Gotti, Moneybagg Yo & 42 Dug...",14:00:00;19:00:00;19:30:00;19:30:00;19:30:00;1...,Theatre;Hip-Hop/Rap;Rock;Rock;Country;Theatre
4,2019-09-08,7;7;7;7,Cats (Touring);Cats (Touring);Bush & +Live+ Th...,13:00:00;18:30:00;19:00:00;19:00:00,Theatre;Theatre;Rock;Rock
5,2019-09-09,7,"The Alchemy Tour: NGHTMRE + SLANDER, Seven Lio...",18:00:00,Dance/Electronic
6,2019-09-10,7,blink-182 & Lil Wayne,19:00:00,Rock
7,2019-09-13,7;7;7,Kid Rock;Morrissey;Cats (Touring),19:30:00;19:30:00;19:30:00,Rock;Rock;Theatre
8,2019-09-14,7;7;7;7;7;7,Cats (Touring);The B-52s;Kid Rock;Cats (Tourin...,14:00:00;19:00:00;19:30:00;19:30:00;20:00:00;2...,Theatre;Rock;Rock;Theatre;Hip-Hop/Rap;Comedy
9,2019-09-15,7;7,Detroit Lions vs. Los Angeles Chargers;Cats (T...,13:00:00;13:00:00,Football;Theatre


In [61]:
df_event_merge['genre'].value_counts()

Hockey                                                                                                       19
Basketball                                                                                                   18
Hockey;Basketball                                                                                             8
Hockey;Theatre                                                                                                6
Basketball;Hockey                                                                                             6
Basketball;Basketball                                                                                         6
Basketball;Theatre                                                                                            4
Hockey;Hockey                                                                                                 3
Rock                                                                                                    

In [58]:
df_event_merge['genre'][0].split(';')

['Pop', 'Theatre']

In [None]:
df_event_out = df_event_in.drop(df_event_in.index[246:250])

In [None]:
df_event_out

In [None]:
write_frame_to_pg(df_event_out, table_name, pipeline_specs, data_path)

#### Update Events Table

In [None]:
def update_events(pipeline_specs):
    # set proxy
    https_proxy = 'https://t0272m1:AlphaPy2019$@iproxy.appl.chrysler.com:9090'
    proxyDict = {"https" : https_proxy}
    # read in event table
    conn_dl, curs_dl = connect_greenplum(pipeline_specs['datalake'])
    schema = pipeline_specs['datalake']['schema']
    table_name = 'abs_event_calendar_tbl'
    query = "select * from \"" + schema + "\".\"" + table_name + "\""
    df_event = create_frame_from_pg(conn_dl, table_name, query)
    # get latest event snapshot
    url_string = "https://app.ticketmaster.com/discovery/v2/events.json?apikey=YGFCvuvjiyjJmT7KbvluGhTASZsBRQxt&page=0"
    marketid = 7    # this will be stored in plant_specs
    url_string += "&marketId={}".format(marketid)
    nevents = 200   # this will be stored in plant_specs
    url_string += "&size={}".format(nevents)
    response = requests.get(url=url_string, proxies=proxyDict)
    if response.status_code == 200:
        json_data = json.loads(response.text)
        event_list = []
        for event in json_data['_embedded']['events']:
            try:
                local_time = event['dates']['start']['localTime']
            except:
                local_time = 'Not Available'
            event_list.append((marketid,
                               event['name'],
                               event['dates']['start']['localDate'],
                               local_time,
                               event['classifications'][0]['genre']['name']))
        event_cols = ['market_id', 'event_name', 'event_date', 'event_time', 'genre']
        df_event_new = pd.DataFrame(event_list, columns=event_cols)
        # concatenate new events with existing table
        df_event = pd.concat([df_event, df_event_new])
        # sort concatenated frame and drop duplicates
        df_event.sort_values(by=['market_id', 'event_date', 'event_time'], inplace=True)
        df_event.drop_duplicates(keep='last', inplace=True)
        # write updated table
        # write_frame_to_pg()
    else:
        print("Status Code from Events API: %d" % response.status_code)
    return df_event

In [None]:
df_event = update_events(pipeline_specs)

In [None]:
df_event.shape

In [None]:
df_event

In [None]:
df_event.drop_duplicates(subset=['event_date'], keep='first', inplace=True)

In [None]:
df_event.shape

In [None]:
df_event

#### Calendrical Analysis

In [None]:
table_date = '20190908'
table_date

In [None]:
# Read in plant table
table_name = '_'.join(['abs', plant_id, 'plant', table_date, 'tbl'])
file_name = '.'.join([table_name, 'csv'])
file_path = '/'.join([data_path, file_name])
df_plant = pd.read_csv(file_path)

In [None]:
df_plant.columns

In [None]:
# drop columns to simulate recreation of calendrical features
drop_cols = ['year',
             'month',
             'quarter',
             'week',
             'day',
             'day_of_week',
             'day_of_year',
             'nth_kday',
             'au_group_mean',
             'au_dow_mean',
             'au_week_mean',
             'au_month_mean',
             'au_nth_kday_mean',
             'holiday',
             'holiday_offset',
             'holiday_dates',
             'au_holiday_mean',
             'au_dow_pct',
             'au_week_pct',
             'au_month_pct',
             'au_nth_kday_pct',
             'au_holiday_pct',
             'day_name',
             'month_name',
             'feature1',
             'feature2',
             'feature3',
             'feature4',
             'feature5']
df_plant.drop(columns=drop_cols, inplace=True)

In [None]:
df_plant.sample(20)

In [None]:
def create_calendrical_stats(dfm, group_cols):
    logger.info("Creating Calendrical Statistics")
    # Fill in date information for all rows vis a vis the prediction frame
    dfm['workdate_dt'] = pd.to_datetime(dfm['workdate'])
    dfm['year'] = dfm['workdate_dt'].dt.year
    dfm['month'] = dfm['workdate_dt'].dt.month
    dfm['quarter'] = dfm['workdate_dt'].dt.quarter
    dfm['week'] = dfm['workdate_dt'].dt.week
    dfm['day'] = dfm['workdate_dt'].dt.day
    dfm['day_of_week'] = dfm['workdate_dt'].dt.dayofweek
    dfm['day_of_year'] = dfm['workdate_dt'].dt.dayofyear
    dfm['nth_kday'] = dfm[['day', 'month', 'year']].apply(lambda x: get_nth_kday_of_month(*x), axis=1)
    # Calculate means and merge all frames together
    dfm_mean = dfm.groupby(group_cols)[['absences_unplanned']].mean().reset_index()
    dfm1 = pd.merge(dfm, dfm_mean, left_on=group_cols, right_on=group_cols, how='left')
    dfm1.rename(index=str, columns={'absences_unplanned_x': 'absences_unplanned', 'absences_unplanned_y': 'au_group_mean'}, inplace=True)
    join_cols = group_cols + ['day_of_week']
    dfm_dow = dfm.groupby(join_cols)[['absences_unplanned']].mean().reset_index()
    dfm2 = pd.merge(dfm1, dfm_dow, left_on=join_cols, right_on=join_cols, how='left')
    dfm2.rename(index=str, columns={'absences_unplanned_x': 'absences_unplanned', 'absences_unplanned_y': 'au_dow_mean'}, inplace=True)
    join_cols = group_cols + ['week']
    dfm_week = dfm.groupby(join_cols)[['absences_unplanned']].mean().reset_index()
    dfm3 = pd.merge(dfm2, dfm_week, left_on=join_cols, right_on=join_cols, how='left')
    dfm3.rename(index=str, columns={'absences_unplanned_x': 'absences_unplanned', 'absences_unplanned_y': 'au_week_mean'}, inplace=True)
    join_cols = group_cols + ['month']
    dfm_month = dfm.groupby(join_cols)[['absences_unplanned']].mean().reset_index()
    dfm4 = pd.merge(dfm3, dfm_month, left_on=join_cols, right_on=join_cols, how='left')
    dfm4.rename(index=str, columns={'absences_unplanned_x': 'absences_unplanned', 'absences_unplanned_y': 'au_month_mean'}, inplace=True)
    join_cols = group_cols + ['nth_kday', 'day_of_week']
    dfm_nth = dfm.groupby(join_cols)[['absences_unplanned']].mean().reset_index()
    dfm5 = pd.merge(dfm4, dfm_nth, left_on=join_cols, right_on=join_cols, how='left')
    dfm5.rename(index=str, columns={'absences_unplanned_x': 'absences_unplanned', 'absences_unplanned_y': 'au_nth_kday_mean'}, inplace=True)
    # Holiday Features
    holidays = get_holidays(dfm5)
    dfh = dfm5.apply(calendrical_features, holidays=holidays, axis=1)
    dfm5 = pd.concat([dfm5, dfh], axis=1)
    dfm5['au_holiday_mean'] = dfm5.apply(holiday_mean, df=dfm5, axis=1)
    # Mean Imputation for NA values
    dfm5['au_dow_mean'].replace([np.inf, -np.inf], np.nan, inplace=True)
    dfm5['au_dow_mean'].fillna(dfm5['au_group_mean'], inplace=True)
    dfm5['au_week_mean'].replace([np.inf, -np.inf], np.nan, inplace=True)
    dfm5['au_week_mean'].fillna(dfm5['au_group_mean'], inplace=True)
    dfm5['au_month_mean'].replace([np.inf, -np.inf], np.nan, inplace=True)
    dfm5['au_month_mean'].fillna(dfm5['au_group_mean'], inplace=True)
    dfm5['au_nth_kday_mean'].replace([np.inf, -np.inf], np.nan, inplace=True)
    dfm5['au_nth_kday_mean'].fillna(dfm5['au_group_mean'], inplace=True)
    dfm5['au_holiday_mean'].replace([np.inf, -np.inf], np.nan, inplace=True)
    dfm5['au_holiday_mean'].fillna(dfm5['au_group_mean'], inplace=True)
    # Historical Percentages
    dfm5['au_dow_pct'] = round(100 * (dfm5['au_dow_mean'] / dfm5['au_group_mean'] - 1), 0)
    dfm5['au_dow_pct'].fillna(0, inplace=True)
    dfm5['au_dow_pct'] = dfm5['au_dow_pct'].astype(int)
    dfm5['au_week_pct'] = round(100 * (dfm5['au_week_mean'] / dfm5['au_group_mean'] - 1), 0)
    dfm5['au_week_pct'].fillna(0, inplace=True)
    dfm5['au_week_pct'] = dfm5['au_week_pct'].astype(int)
    dfm5['au_month_pct'] = round(100 * (dfm5['au_month_mean'] / dfm5['au_group_mean'] - 1), 0)
    dfm5['au_month_pct'].fillna(0, inplace=True)
    dfm5['au_month_pct'] = dfm5['au_month_pct'].astype(int)
    dfm5['au_nth_kday_pct'] = round(100 * (dfm5['au_nth_kday_mean'] / dfm5['au_group_mean'] - 1), 0)
    dfm5['au_nth_kday_pct'].fillna(0, inplace=True)
    dfm5['au_nth_kday_pct'] = dfm5['au_nth_kday_pct'].astype(int)
    dfm5['au_holiday_pct'] = round(100 * (dfm5['au_holiday_mean'] / dfm5['au_group_mean'] - 1), 0)
    dfm5['au_holiday_pct'].fillna(0, inplace=True)
    dfm5['au_holiday_pct'] = dfm5['au_holiday_pct'].astype(int)
    # Set day and month names   
    dfm5['day_name'] = dfm5['workdate_dt'].dt.day_name()
    dfm5['month_name'] = dfm5['workdate_dt'].dt.month_name()
    # Assemble calendar features
    dfm5['feature1'] = dfm5['day_name'] + ': ' + dfm5['au_dow_pct'].astype(str) + '%'
    dfm5['feature2'] = 'Week ' + dfm5['week'].astype(str) + ': ' + dfm5['au_week_pct'].astype(str) + '%'
    dfm5['feature3'] = dfm5['month_name'] + ': ' + dfm5['au_month_pct'].astype(str) + '%'
    dfm5['feature4'] = dfm5['nth_kday'].astype(str) + ' ' + dfm5['day_name'] +  ': ' + dfm5['au_nth_kday_pct'].astype(str) + '%'
    dfm5['feature4'] = dfm5['feature4'].str.replace('^1', '1st', regex=True)
    dfm5['feature4'] = dfm5['feature4'].str.replace('^2', '2nd', regex=True)
    dfm5['feature4'] = dfm5['feature4'].str.replace('^3', '3rd', regex=True)
    dfm5['feature4'] = dfm5['feature4'].str.replace('^4', '4th', regex=True)
    dfm5['feature4'] = dfm5['feature4'].str.replace('^5', '5th', regex=True)
    dfm5['holiday_plus_minus'] = dfm5['holiday_offset'].apply(lambda x: 'after' if x > 0 else 'before')
    dfm5.loc[dfm5['holiday_offset'] == 0, 'holiday_plus_minus'] = 'it\'s'
    dfm5['feature5'] = abs(dfm5['holiday_offset']).astype(str) + ' days ' + dfm5['holiday_plus_minus'] + ' ' \
                       + dfm5['holiday'] + ': ' + dfm5['au_holiday_pct'].astype(str) + '%'
    # Read in event table
    df_event = update_events(pipeline_specs)
    df_event.drop_duplicates(subset=['event_date'], keep='first', inplace=True)
    dfm5 = pd.merge(dfm5, df_event, left_on=['workdate'], right_on=['event_date'], how='left')
    # Read in event table
    drop_cols = ['workdate_dt',
                 'holiday_plus_minus',
                 'market_id',
                 'event_date',
                 'event_time',
                 'genre']
    dfm5.drop(columns=drop_cols, inplace=True)
    return dfm5

In [None]:
dfc = create_calendrical_stats(df_plant, plant_specs['model']['levels'])

In [None]:
dfc_event = pd.merge(dfc, df_event, left_on=['workdate'], right_on=['event_date'], how='left')

In [None]:
dfc_event.columns

In [None]:
dfc_event[['workdate', 'holiday', 'holiday_offset', 'feature1', 'feature2', 'feature3', 'feature4', 'feature5', 'event_name']].tail(50)

#### PredictHQ API

In [None]:
radius = 100
lat_long = "42.375292,-82.966222"
area_string = str(radius) + "mi@" + lat_long
area_string

In [None]:
maximum_events = 20
maximum_events

In [None]:
offset = 0
offset

In [None]:
# This is the event search.
response = requests.get(
    url="https://api.predicthq.com/v1/events/",
    headers={"Authorization": "Bearer CIve3xYUdfXFn0Wbznseqr8ngCzgfR"},
    params={"within"           : area_string,
            "limit"            : maximum_events,
            "offset"           : offset,
            "start.gte"        : "2018-01-01",
            "start.lte"        : "2019-01-01",
            "category"         : "festivals",
            "rank_level"       : "4,5"},
    proxies=proxyDict
)

In [None]:
# This is the event calendar, which is more of a summary.
response = requests.get(
    url="https://api.predicthq.com/v1/events/calendar/",
    headers={"Authorization": "Bearer CIve3xYUdfXFn0Wbznseqr8ngCzgfR"},
    params={"within"           : area_string,
            "limit"            : maximum_events,
            "offset"           : offset,
            "category"         : "concerts,festivals,sports",
            "rank_level"       : "4,5"},
    proxies=proxyDict
)

In [None]:
response_json = response.json()

In [None]:
response

In [None]:
json_data = json.loads(response.text)

In [None]:
json_data