## Forecasting congestion in October with Random Forest

In [None]:
## Importing libraries
import warnings
import numpy as np
import pandas as pd
import rf_modeling as rf_m
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
pd.set_option('display.max_columns', 50)
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)

In [None]:
## Reading the data sets
train = pd.read_csv('/Users/EvanCallaghan/Documents/Data_Science_Group/Analytics_Data_Science/Forecasting_Traffic_Flow/Data/train.csv')
test = pd.read_csv('/Users/EvanCallaghan/Documents/Data_Science_Group/Analytics_Data_Science/Forecasting_Traffic_Flow/Data/test.csv')

## Getting the 'time' variable in the proper format
train['time'] = pd.to_datetime(train['time'], format = '%Y-%m-%d %H:%M:%S')
test['time'] = pd.to_datetime(test['time'], format = '%Y-%m-%d %H:%M:%S')

## Printing the first five observations
train.head()

### Variable Engineering:

In [None]:
## Extracting day, hour and minute
train['day'] = train['time'].dt.dayofweek
train['hour'] = train['time'].dt.hour
train['minute'] = train['time'].dt.minute

test['day'] = test['time'].dt.dayofweek
test['hour'] = test['time'].dt.hour
test['minute'] = test['time'].dt.minute

## Changing direction to dummies
train = pd.concat([train, pd.get_dummies(train['direction'])], axis = 1)
test = pd.concat([test, pd.get_dummies(test['direction'])], axis = 1)

### Modeling:

In [None]:
## Calling the 'main_rf' function in the rf_modeling script
results = rf_m.main_rf(train, test)

## Storing results as csv files
results[0].to_csv('results_validation.csv', index = False)
results[1].to_csv('results_test.csv', index = False)

### Evaluation:

In [None]:
from sklearn.metrics import mean_absolute_error

## Reading the newly created validation results data set
validation = pd.read_csv('results_validation')

## Computing the MAE of congestion predictions
print('MAE of validation set predictions:', mean_absolute_error(validation['congestion'], validation['congestion_pred']))