In [None]:
import pandas as pd
import numpy as np

import datetime as dt

from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
from sklearn.utils import resample


import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)   
    
from src.sports_scrapers import scrape_huskies, scrape_seahawks
from src.weather_scraper import get_raw_forecast, get_raw_forecast_dataframe, get_hi_temperature, seattle_weather_fcst
from src.data_retrievers import DataRetrieval
from src.holiday_calendars import SeattleHolidays
from src.featurizers import (CountCalls, FeaturizeCalls, DateDummies, HolidayDummies, 
                             EventDummies, MakeDummies, FeaturizeDates, JoinDataFrames,
                             MakeModelInput, AddWeatherForecast)
from src.models import (calls_pipe, forecast_pipe, baseline_model, city_model, 
                        neighborhood_dist_model, model_ensemble)
from src.neighborhood_ratings import neighborhood_ratings

# Get initial calls for service data

In [None]:
retriever = DataRetrieval()

In [None]:
calls = retriever.get_calls_data()

In [None]:
targets, features = calls_pipe(calls)

#  Train-Test Split

In [None]:
X_train, X_test, y_train, y_test =  train_test_split(features, targets,
                                                     test_size=0.2, random_state=157)


# Baseline Modle

In [None]:
neighborhood_model = baseline_model(X_train, y_train)

In [None]:
neighborhood_train_predictions = neighborhood_model.predict(X_train)
neighborhood_train_mse = mean_squared_error(y_train, neighborhood_train_predictions)

neighborhood_test_predictions = neighborhood_model_b.predict(X_test)
neighborhood_test_mse = mean_squared_error(y_test, neighborhood_test_predictions)

In [None]:
neighborhood_train_mse, neighborhood_test_mse

In [None]:
neighborhood_model.score(X_train, y_train)

# City Model - GBRT

In [None]:
city_model = city_model(X_train, y_train)

In [None]:
train_pred_city = model_city.predict(X_train)
test_pred_city = model_city.predict(X_test)

In [None]:
mean_squared_error(y_train.sum(axis=1), model_city.predict(X_train)), mean_squared_error(y_test.sum(axis=1), model_city.predict(X_test))

In [None]:
model_city.score(X_train, y_train.sum(axis=1))

# Neighborhood Distribution - Random Forest


In [None]:
rf_dist = neighborhood_dist_model(X_train, y_train)

In [None]:
neighborhood_dist_train = pd.DataFrame(np.array(y_train.T) / np.array(y_train.sum(axis=1))).T
neighborhood_dist_test = pd.DataFrame(np.array(y_test.T) / np.array(y_test.sum(axis=1))).T

In [None]:
train_pred_dist = rf_dist.predict(X_train)
test_pred_dist = rf_dist.predict(X_test)

mean_squared_error(neighborhood_dist_train, train_pred_dist), mean_squared_error(neighborhood_dist_test, test_pred_dist)

In [None]:
rf_dist.score(X_train, neighborhood_dist_train)

# Create Model Ensemble

In [None]:
train_pred_comb = model_ensemble(train_pred_city, neighborhood_dist_train)
test_pred_comb = model_ensemble(test_pred_city, neighborhood_dist_test)

In [None]:

mean_squared_error(y_train, train_pred_comb.T), mean_squared_error(y_test, test_pred_comb.T)

# Create Forecast

In [None]:
start_date = '12/03/2018'
end_date = '12/31/2019'
model_end = ('09/30/2018', 3194)

In [None]:
forecast_features = forecast_pipe(start_date, end_date, model_end)

In [None]:
forecast_predictions = model_city.predict(forecast_features.drop(columns='date'))

In [None]:
pd.to_pickle(forecast_predictions, '../dashboard_data/city_predictions.pkl')

In [None]:
neighborhood_dist_predictions = rf_dist.predict(forecast_features.drop(columns='date'))

In [None]:
neighborhood_predictions = model_ensemble(forecast_predictions,
                                          neighborhood_dist_predictions, forecast_features, targets)

In [None]:
pd.to_pickle(neighborhood_predictions, '../dashboard_data/neighborhood_predictions.pkl')

# Make neighborhood ratings for heatmap

In [None]:
ratings = neighborhood_ratings(neighborhood_predictions, forecast_features, targets.columns)

In [None]:
pd.to_pickle(ratings, '../dashboard_data/neighborhood_ratings.pkl')