# Using OmniSci and LSTM to Predict Traffic Flow

Here, we: 
1. Load data from OmniSci database straight to pandas
2. Predict speed of cars using Keras
3. Send data from pandas directly with pymapd to OmniSci

<img src="files/traffic_photo.jpeg" width="400">


# Import Libraries

In [3]:
import time
import sys
from configparser import ConfigParser
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import math
from sklearn.metrics import mean_squared_error

sys.path.append('../')
from src.data_processing.process_utils import apply_custom_transformations
import src.data_processing.process_utils as utils
from src.omnisci_connector.omni_connect import OmnisciConnect
from src import train_utils

config_path = '../config.ini'
config = ConfigParser()
config.read(config_path)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


ModuleNotFoundError: No module named 'statsmodels.api'

# Configure and connect to OmniSci

In [None]:
import pymapd

con = pymapd.connect(user="abraham", 
                     password="abraham", 
                     host="localhost", 
                     dbname="abraham", 
                     port=6273,
                     protocol='http')

print(con)

# Traffic only Prediction

## Bring in DataFrame from OmniSci using pymapd

In [None]:
table_name = "caltrans_historic_2015_2019"

cols = "timestamp_, \
station, \
direction, \
freeway, \
occupancy, \
speed "

condition = "WHERE timestamp_ >= '2019-02-01 00:00' \
AND timestamp_ <  '2019-03-01 00:00' \
AND freeway = 101"

query = "select " + cols + " from " + table_name + " " + condition

print(query)

In [None]:
df_Omnisci = con.select_ipc(query)
df_Omnisci = df_Omnisci.sort_values('timestamp_')

df_Omnisci.head()

In [None]:
# Number of timesteps to look back
n_lag = 12
n_steps = 6

### Prepare traffic data

In [None]:
cols = ['station','timestamp_','occupancy','speed']

df_tpredict = (df_Omnisci[cols].set_index(['station','timestamp_'])
                      .sort_values(['station','timestamp_']))

treframed, tkey, tscaled, tscaler1 = train_utils.format_model_data(df_tpredict, n_lag, n_steps)

treframed = train_utils.remove_cols(treframed)

treframed.head()

## Predict traffic with speed, occupancy

In [None]:
tmodel = '../models/traffic_190513_2300.h5'

tinv_y, tinv_yhat, trmse = train_utils.predict_data(treframed, tmodel,tscaler1)



In [None]:
from matplotlib.pyplot import figure

figure(num=None, figsize=(20, 6), dpi=80, edgecolor='k')
plt.title("Prediction using Traffic Data")
plt.plot(tinv_y[0:600])
plt.plot(tinv_yhat[0:600])

# Prepare Weather Data

### Read in metadata for traffic and weather

In [None]:
query_traffic_meta = "select * from caltrans_traffic_d04_metatable_weatherID"
print(query_traffic_meta)
df_traffic_metadata = con.select_ipc(query_traffic_meta)

In [None]:
cols = "timestamp_, \
hourlyprecipitation, \
hourlyvisibility, \
hourlywindspeed, \
weather_station_id"

query_weather = "select "+ cols + " from ncdc_weather_clean_190511"

print(query_weather)

In [None]:
df_weather = con.select_ipc(query_weather)

df_weather = df_weather.sort_values(by=['timestamp_'])

df_w_traffic = (df_Omnisci.set_index('timestamp_')
              .join(df_traffic_metadata[['id','weather_station_id']].set_index('id'), on='station')
              .sort_values(by=['timestamp_'])
             )


df_w_traffic.head()

## Prepare traffic and weather data for model

In [None]:
data_cols = ['station','timestamp_','occupancy','speed','hourlyprecipitation','hourlywindspeed']

join_key = ['timestamp_']
df_wpredict = pd.merge_asof(left=df_w_traffic,
                      right=df_weather,
                      on=join_key,
                      by='weather_station_id',
                          direction='nearest')

df_wpredict = (df_wpredict[data_cols].sort_values(['station','timestamp_'])
                      .set_index(['station','timestamp_']))


In [None]:
wreframed, wkey, wscaled, wscaler1 = train_utils.format_model_data(df_wpredict, n_lag, n_steps)

wreframed = train_utils.remove_cols(wreframed)

wreframed.head()

## Predict with Weather+Traffic Data

In [None]:
wmodel = '../models/190516_0000_TrafficAndWeather.h5'

winv_y, winv_yhat, wrmse = train_utils.predict_data(wreframed, wmodel,wscaler1)

In [None]:
from matplotlib.pyplot import figure

figure(num=None, figsize=(20, 6), dpi=80,edgecolor='k')

plt.plot(winv_y[0:600])
plt.plot(winv_yhat[0:600])

# Send predicted data back to OmniSci

In [None]:
tmp_ = pd.concat([pd.DataFrame(train_utils.data_index(new_df, n_lag, n_steps)) for station, new_df in df_wpredict.groupby(level=0)], ignore_index=True)
 
my_index = pd.DataFrame(tmp_[0].tolist(), index=tmp_.index).rename(index=str, columns={0: "station", 1: "timestamp_"})

my_index.head()


In [None]:
df_out = my_index

df_out['actual_speed'] = tinv_y
df_out['predicted_speed_t'] = tinv_yhat
df_out['predicted_speed_w'] = winv_yhat


df_out['tdifference'] = abs(tinv_y - tinv_yhat)
df_out['wdifference'] = abs(winv_y - winv_yhat)
cols = ['occupancy','hourlyprecipitation','hourlyvisibility','hourlywindspeed']
# cols = ['occupancy','hourlyprecipitation']
df_out = df_out.set_index(['station','timestamp_']).join(df_wpredict[cols]).reset_index()

df_out.head()

In [None]:
con.load_table("predicted_traffic_weather_190522_0000", df_out)