# I70 Traffic Predictor
---

### The goal of this project is to be able to predict the amount of cars that will pass by a traffic measuring station along I70 for a given hour of a day

We are using data from NOAA (National Oceanic and Atmospheric Administration) for daily snow totals and daily snow depth from a measuring station in Winter Park, Colorado, as well as data from CDOT (Colorado Department of Transportation) with a counter of how many cars pass by the station (count station 000120 on I70 right before Idaho Springs)

In [2]:
import numpy as np
import sklearn
import math
import matplotlib.pyplot as plt
import pandas as pd
import data
from datetime import datetime
import calendar
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from tensorflow.keras.losses import MeanSquaredLogarithmicError

In [3]:
def transform_data_one_year(traffic_data, snow_depth_data, daily_snow_data):
    '''
    Function to return a transformed dataframe of a year's worth of data
    :param traffic_data: The traffic dataframe for a given year, exported from CDOT
    :param snow_depth_data: The snow depth dataframe for the same year, from NOAA
    :param daily_snow_data: The daily snow datafram fro the same year, from NOAA
    :return: A single, formatted dataframe containing all of the information from the parameters
    '''
    # Replace all invalid data with 0s
    daily_snow_data.replace("T", 0, inplace=True)
    daily_snow_data.replace("T", 0, inplace=True)
    daily_snow_data.replace("M", 0, inplace=True)
    daily_snow_data.replace("M", 0, inplace=True)
    # Only care about secondary direction
    # drop rows where COUNTDIR != 'S'
    traffic_data = traffic_data[traffic_data['COUNTDIR'] == 'S']
    df = pd.DataFrame()
    # temp arrays so we can store them in the datafram
    temp_month = []
    temp_day = []
    temp_year = []
    temp_hour = []
    temp_snow_depth = []
    temp_daily_snow = []
    temp_count = []
    
    # iterate through every hour of every day and get the data for it
    for i, date in enumerate(traffic_data['FormattedDate']):
        for j in range(24):
            temp_count.append(traffic_data.iloc[i]['HOUR' + str(j)])
            temp_hour.append(j)
            month = datetime.strptime(date, "%m/%d/%Y").month
            day = datetime.strptime(date, "%m/%d/%Y").day
            year = datetime.strptime(date, "%m/%d/%Y").year
            temp_month.append(month)
            temp_day.append(day)
            temp_year.append(year)
            temp_snow_depth.append(float(snow_depth_data.iloc[day-1][calendar.month_abbr[month]]))
            temp_daily_snow.append(float(daily_snow_data.iloc[day-1][calendar.month_abbr[month]]))
            
    # finally create the dataframe
    df['MONTH'] = temp_month
    df['YEAR'] = temp_year
    df['DAY'] = temp_day
    df['HOUR'] = temp_hour
    df['SNOW_DEPTH'] = temp_snow_depth
    df['DAILY_SNOW'] = temp_daily_snow
    df['COUNT'] = temp_count

    return df
    
# Create the datafram for 2019
snow_depth19 = pd.read_csv("data/2019WinterParkSnowDepth.csv")
daily_snow19 = pd.read_csv("data/2019WinterParkDailySnow.csv")
traffic19 = pd.read_csv("data/AnnualTrafficVolume2019.csv")
df1 = transform_data_one_year(traffic19, snow_depth19, daily_snow19)

# Create the datafram for 2020
snow_depth20 = pd.read_csv("data/2020WinterParkSnowDepth.csv")
daily_snow20 = pd.read_csv("data/2020WinterParkDailySnow.csv")
traffic20 = pd.read_csv("data/AnnualTrafficVolume2020.csv")

# Create the datafram for 2021
df2 = transform_data_one_year(traffic20, snow_depth20, daily_snow20)
snow_depth21 = pd.read_csv("data/2021WinterParkSnowDepth.csv")
daily_snow21 = pd.read_csv("data/2021WinterParkDailySnow.csv")
traffic21 = pd.read_csv("data/AnnualTrafficVolume2021.csv")
df3 = transform_data_one_year(traffic21, snow_depth21, daily_snow21)

# Create the datafram for 2022
snow_depth22 = pd.read_csv("data/2022WinterParkSnowDepth.csv")
daily_snow22 = pd.read_csv("data/2022WinterParkDailySnow.csv")
traffic22 = pd.read_csv("data/AnnualTrafficVolume2022.csv")
df4 = transform_data_one_year(traffic22, snow_depth22, daily_snow22)

# concatenate the dataframes
frames = [df1, df2, df3, df4]
full_dataframe = pd.concat(frames)
full_dataframe.dtypes

MONTH           int64
YEAR            int64
DAY             int64
HOUR            int64
SNOW_DEPTH    float64
DAILY_SNOW    float64
COUNT           int64
dtype: object

In [19]:
np_data = full_dataframe
X = full_dataframe.iloc[:,:-1]
y = full_dataframe.iloc[:,-1]
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y)

def scale_datasets(x_train, x_test):

    """
    Standard Scale test and train data
    Z - Score normalization
    """
    standard_scaler = StandardScaler()
    x_train_scaled = pd.DataFrame(
      standard_scaler.fit_transform(x_train),
      columns=x_train.columns
    )
    x_test_scaled = pd.DataFrame(
      standard_scaler.transform(x_test),
      columns = x_test.columns
    )
    return x_train_scaled, x_test_scaled

x_train_scaled, x_test_scaled = scale_datasets(x_train, x_test)

hidden_units1 = 160
hidden_units2 = 480
hidden_units3 = 256
learning_rate = 0.01
# Creating model using the Sequential in tensorflow
def build_model_using_sequential():
    model = Sequential([
        Dense(hidden_units1, kernel_initializer='normal', activation='relu'),
        Dropout(0.2),
        Dense(hidden_units2, kernel_initializer='normal', activation='relu'),
        Dropout(0.2),
        Dense(hidden_units3, kernel_initializer='normal', activation='relu'),
        Dense(1, kernel_initializer='normal', activation='linear')
    ])
    return model
# build the model
model = build_model_using_sequential()

# loss function
msle = MeanSquaredLogarithmicError()
model.compile(
    loss=msle, 
    optimizer=Adam(learning_rate=learning_rate), 
    metrics=[msle]
)
# train the model
history = model.fit(
    x_train_scaled, 
    y_train, 
    epochs=20, 
    batch_size=64,
    validation_split=0.2
)

# transformer = PolynomialFeatures(degree=2, include_bias=False)
# transformer.fit(X_train)
# X_train_ = transformer.transform(X_train)
# model = LinearRegression().fit(X_train_, y_train)
# print("score:", model.score(X_train, y_train))

Epoch 1/20


  return t[start:end]


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [27]:
predictions = model.predict(x_test_scaled)
count = 0
for i in range(len(predictions)):    
    delta = y_test.iloc[i] * 0.2
    # print(predictions[i], y_test.iloc[i])
    if y_test.iloc[i] - delta < predictions[i] and predictions[i] < y_test.iloc[i] + delta:
        count += 1
        
print("score:", count/len(predictions))

score: 0.5126582278481012
