In [2]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import matplotlib.style as style

%matplotlib inline
style.use('seaborn-notebook')

In [5]:
# Load Model
import pickle
filename = '../models/finalized_model.sav'
m = pickle.load(open(filename, 'rb'))
m

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bytree=0.3, gamma=0.0, learning_rate=0.15,
              max_delta_step=0, max_depth=3, min_child_weight=7, missing=nan,
              n_estimators=100, n_jobs=1, nthread=None,
              objective='binary:logistic', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=2.3, seed=None, silent=True,
              subsample=1)

In [12]:
# Generate new data
input_filepath = '../data/raw/'
sar_header = ['DEM', 'date', 'responders', 'hours', 'miles']
sar_data = pd.read_csv(f'{input_filepath}KCSARA.csv', header=None, 
                       parse_dates=['date'], names=sar_header)
sar_data = pd.DataFrame(sar_data)
sar_data.head()

date_range = pd.date_range(start='1/1/2019', end='4/15/2019')
clean_table = []

for d in date_range:
    if sar_data.date.isin([d]).any():
        clean_table.append([d,1])
    else:
        clean_table.append([d,0])
               
sar_clean = pd.DataFrame(clean_table)
sar_clean.columns = ['date','mission']
test = sar_clean
test.head()

Unnamed: 0,date,mission
0,2019-01-01,1
1,2019-01-02,0
2,2019-01-03,0
3,2019-01-04,0
4,2019-01-05,0


In [13]:
def add_datepart(df, fldname, drop=True, time=False, errors="raise"):
    fld = df[fldname]
    fld_dtype = fld.dtype
    if isinstance(fld_dtype, pd.core.dtypes.dtypes.DatetimeTZDtype):
        fld_dtype = np.datetime64

    if not np.issubdtype(fld_dtype, np.datetime64):
        df[fldname] = fld = pd.to_datetime(fld, infer_datetime_format=True, errors=errors)
    targ_pre = re.sub('[Dd]ate$', '', fldname)
    attr = ['Year', 'Month', 'Week', 'Day', 'Dayofweek', 'Dayofyear',
            'Is_month_end', 'Is_month_start', 'Is_quarter_end', 'Is_quarter_start', 'Is_year_end', 'Is_year_start']
    if time: attr = attr + ['Hour', 'Minute', 'Second']
    for n in attr: df[targ_pre + n] = getattr(fld.dt, n.lower())
    df[targ_pre + 'Elapsed'] = fld.astype(np.int64) // 10 ** 9
    if drop: df.drop(fldname, axis=1, inplace=True)

add_datepart(test, 'date', drop=False)
test.head()

Unnamed: 0,date,mission,Year,Month,Week,Day,Dayofweek,Dayofyear,Is_month_end,Is_month_start,Is_quarter_end,Is_quarter_start,Is_year_end,Is_year_start,Elapsed
0,2019-01-01,1,2019,1,1,1,1,1,False,True,False,True,False,True,1546300800
1,2019-01-02,0,2019,1,1,2,2,2,False,False,False,False,False,False,1546387200
2,2019-01-03,0,2019,1,1,3,3,3,False,False,False,False,False,False,1546473600
3,2019-01-04,0,2019,1,1,4,4,4,False,False,False,False,False,False,1546560000
4,2019-01-05,0,2019,1,1,5,5,5,False,False,False,False,False,False,1546646400


In [14]:
test.mission.mean()

0.3523809523809524

In [16]:
boeing = pd.read_csv(f'{input_filepath}boeing.csv', parse_dates=[2])
gardner = pd.read_csv(f'{input_filepath}gardner.csv', parse_dates=[2])
weather = pd.merge(boeing, gardner, how='inner', on='DATE')
weather = weather.fillna(0)
test = pd.merge(test, weather, how='inner', left_on='date', right_on='DATE')