In [355]:
import pandas as pd
import numpy as np

import datetime as dt

from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
from sklearn.utils import resample


import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)   
    
from src.sports_scrapers import scrape_huskies, scrape_seahawks
from src.weather_scraper import get_raw_forecast, get_raw_forecast_dataframe, get_hi_temperature, seattle_weather_fcst
from src.data_retrievers import DataRetrieval
from src.holiday_calendars import SeattleHolidays
from src.featurizers import (CountCalls, FeaturizeCalls, DateDummies, HolidayDummies, 
                             EventDummies, MakeDummies, FeaturizeDates, JoinDataFrames,
                             MakeModelInput, AddWeatherForecast)
from src.models import (calls_pipe, forecast_pipe, baseline_model, city_model, 
                        neighborhood_dist_model, model_ensemble)
from src.neighborhood_ratings import neighborhood_ratings

# Get initial calls for service data

In [40]:
retriever = DataRetrieval()

In [41]:
calls = retriever.get_calls_data()

In [42]:
targets, features = calls_pipe(calls)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  df["date"] = ""
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  df["seahawks_game"][i] = "home_Playoffs"
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  df["seahawks_game"][i] = "away_Playoffs"
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  df["seahawks_game"][i] = "SuperBowl"
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentati

#  Train-Test Split

In [43]:
X_train, X_test, y_train, y_test =  train_test_split(features, targets,
                                                     test_size=0.2, random_state=157)


# Baseline Modle

In [57]:
neighborhood_model = baseline_model(X_train, y_train)

In [58]:
neighborhood_train_predictions = neighborhood_model.predict(X_train)
neighborhood_train_mse = mean_squared_error(y_train, neighborhood_train_predictions)

neighborhood_test_predictions = neighborhood_model_b.predict(X_test)
neighborhood_test_mse = mean_squared_error(y_test, neighborhood_test_predictions)

In [59]:
neighborhood_train_mse, neighborhood_test_mse

(0.501661048512353, 0.6271119612526889)

In [60]:
neighborhood_model.score(X_train, y_train)

0.11125025868723953

# City Model - GBRT

In [63]:
city_model = city_model(X_train, y_train)

In [66]:
train_pred_city = model_city.predict(X_train)
test_pred_city = model_city.predict(X_test)

In [65]:
mean_squared_error(y_train.sum(axis=1), model_city.predict(X_train)), mean_squared_error(y_test.sum(axis=1), model_city.predict(X_test))

(29.71786482471047, 34.05285266499792)

In [64]:
model_city.score(X_train, y_train.sum(axis=1))

0.28958052946245827

# Neighborhood Distribution - Random Forest


In [69]:
rf_dist = neighborhood_dist_model(X_train, y_train)

In [68]:
neighborhood_dist_train = pd.DataFrame(np.array(y_train.T) / np.array(y_train.sum(axis=1))).T
neighborhood_dist_test = pd.DataFrame(np.array(y_test.T) / np.array(y_test.sum(axis=1))).T

In [70]:
train_pred_dist = rf_dist.predict(X_train)
test_pred_dist = rf_dist.predict(X_test)

mean_squared_error(neighborhood_dist_train, train_pred_dist), mean_squared_error(neighborhood_dist_test, test_pred_dist)

(0.0005990147695879705, 0.0006336926274971152)

In [72]:
rf_dist.score(X_train, neighborhood_dist_train)

0.04982028605339261

# Create Model Ensemble

In [74]:
train_pred_comb = model_ensemble(train_pred_city, neighborhood_dist_train)
test_pred_comb = model_ensemble(test_pred_city, neighborhood_dist_test)

In [75]:

mean_squared_error(y_train, train_pred_comb.T), mean_squared_error(y_test, test_pred_comb.T)

(0.032882601550400306, 0.03728514228671553)

# Create Forecast

In [350]:
start_date = '12/03/2018'
end_date = '12/31/2019'
model_end = ('09/30/2018', 3194)

In [351]:
forecast_features = forecast_pipe(start_date, end_date, model_end)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  elif df.iloc[i]["Week"] in ["Wild Card", "Division", "Conf. Champ."]:
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  elif df.iloc[i]["Week"] in ["Wild Card", "Division", "Conf. Champ."]:
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if df.iloc[i]["Week"] == "SuperBowl":
A value is trying to be set on a copy of a slice from a D

In [352]:
forecast_predictions = model_city.predict(forecast_features.drop(columns='date'))

In [358]:
pd.to_pickle(forecast_predictions, '../dashboard_data/city_predictions.pkl')

In [353]:
neighborhood_dist_predictions = rf_dist.predict(forecast_features.drop(columns='date'))

In [381]:
neighborhood_predictions = model_ensemble(forecast_predictions,
                                          neighborhood_dist_predictions, forecast_features, targets)

In [383]:
pd.to_pickle(neighborhood_predictions, '../dashboard_data/neighborhood_predictions.pkl')

# Make neighborhood ratings for heatmap

In [343]:
ratings = neighborhood_ratings(neighborhood_predictions, forecast_features, targets.columns)

In [360]:
pd.to_pickle(ratings, '../dashboard_data/neighborhood_ratings.pkl')