In [1]:
import pandas as pd
import numpy as np
import csv
import math
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import TimeDistributed, Dense, Conv2D, Flatten 
from keras.layers import MaxPooling2D, Dropout, LSTM
from sklearn.preprocessing import StandardScaler

In [2]:
def processDataFrame(fileName):
    df = pd.read_csv(fileName)
    
    noOfCells = df['NoOfCells'][0]

    columnName = list(df)[2:15]
    print(columnName)
    
    df = df[columnName].astype(float)
    print(df.head(5))
        
    return df, noOfCells

In [3]:
def getTrainingSetSize(df, noOfcells):
    trainingDataPointSize = int(df.shape[0]*0.8/noOfcells)
    training_setsize = trainingDataPointSize*noOfcells
    return training_setsize

In [4]:
def getProcessedDataSet(df, noOfCells):
    input_X = []
    input_Y = []
    dataPoint_X = []
    dataPoint_Y = []
    scaler = StandardScaler()


    for index, rows in df.iterrows():
        if index == 0:
            X_row_list = [rows.PhaseStatus, rows.PhaseElapsedTime, rows.Speed, rows.DistanceToStopBar,rows.FrontCellStatus, rows.FrontCellVehicleSpeed]
            dataPoint_X.append(X_row_list)
            dataPoint_Y.append(rows.CellStatus)
        
        elif index % noOfCells != 0:
            X_row_list = [rows.PhaseStatus, rows.PhaseElapsedTime, rows.Speed, rows.DistanceToStopBar,rows.FrontCellStatus, rows.FrontCellVehicleSpeed]
            
            dataPoint_X.append(X_row_list)
            dataPoint_Y.append(rows.CellStatus)

        elif index % noOfCells == 0 and len(dataPoint_X) > 0:
            scaler = scaler.fit(dataPoint_X)
            dataPoint_X = scaler.transform(dataPoint_X)
            input_X.append(dataPoint_X)
            input_Y.append(dataPoint_Y)
            
            dataPoint_X = []
            dataPoint_Y = []
            X_row_list = [rows.PhaseStatus, rows.PhaseElapsedTime, rows.Speed, rows.DistanceToStopBar,rows.FrontCellStatus, rows.FrontCellVehicleSpeed]
            
            dataPoint_X.append(X_row_list)
            dataPoint_Y.append(rows.CellStatus)
        
        if index == len(df) - 1:
            scaler = scaler.fit(dataPoint_X)
            dataPoint_X = scaler.transform(dataPoint_X)
            input_X.append(dataPoint_X)
            input_Y.append(dataPoint_Y)  

    input_X = np.array(input_X)
    input_Y = np.array(input_Y)
    
    return input_X, input_Y

In [5]:
def createTimeSeriesData(inputX, inputY, n_future, n_past):
    timeSeriesDataX = []
    timeSeriesDataY=[]
    
    for i in range (n_past, inputX.shape[0] - n_future + 1):
        timeSeriesDataX.append(inputX[i-n_past:i,])
        
    timeSeriesDataY= inputY[n_past-1:inputY.shape[0]:]
    
    timeSeriesDataX = np.array(timeSeriesDataX)
    timeSeriesDataX = timeSeriesDataX.reshape(timeSeriesDataX.shape[0], timeSeriesDataX.shape[1], timeSeriesDataX.shape[2], timeSeriesDataX.shape[3], 1)
    
    return timeSeriesDataX, timeSeriesDataY

In [6]:
def createModel(dataShape, noOfCells):
    model = Sequential()#add model layers

    model.add(TimeDistributed(Conv2D(44, (3, 3), activation='relu'), input_shape = dataShape))
    model.add(TimeDistributed(Dropout(0.2)))
    model.add(TimeDistributed(Conv2D(22, (3, 3), activation='relu')))
    model.add(TimeDistributed(Dropout(0.2)))
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(30, return_sequences = True))
    model.add(Dropout(0.2))
    model.add(LSTM(15))
    model.add(Dropout(0.2))
    model.add(Dense(noOfCells, kernel_initializer='uniform', activation="relu"))
    model.compile(optimizer='adam', loss='mse')


    # look at the params before training
    model.summary()
    
    return model

In [7]:
def evaluateModel(model,noOfIteration):
    #train the model
    model.fit(trainX, trainY, validation_data=(testX, testY), epochs=noOfIteration)
    # evaluate the model
    score = model.evaluate(testX, testY, verbose=1)
    
    print(score)

In [8]:
df_forTraining, noOfCells = processDataFrame('combined-data.txt')
# df_forTraining, noOfCells = processDataFrame('test-data.csv')

['VehicleType', 'PhaseStatus', 'PhaseElapsedTime', 'Speed', 'DistanceToStopBar', 'FrontCellStatus', 'FrontCellVehicleSpeed', 'CellStatus']
   VehicleType  PhaseStatus  PhaseElapsedTime  Speed  DistanceToStopBar  \
0          0.0          4.0              0.12   -1.0              11.43   
1          0.0          4.0              0.12   -1.0              19.05   
2          0.0          4.0              0.12   -1.0              26.67   
3          0.0          4.0              0.12   -1.0              34.29   
4          0.0          4.0              0.12   -1.0              41.91   

   FrontCellStatus  FrontCellVehicleSpeed  CellStatus  
0              0.0                   -1.0         0.0  
1              0.0                   -1.0         0.0  
2              0.0                   -1.0         0.0  
3              0.0                   -1.0         0.0  
4              0.0                   -1.0         0.0  


In [9]:
train_inputX, train_inputY = getProcessedDataSet(df_forTraining, noOfCells)

print(train_inputX.shape)
print(train_inputY.shape)

(114992, 44, 6)
(114992, 44)


In [10]:
n_future = 0 #Number of days we want to predict into the future
n_past = 200 #Number of past days we want to use to predict the future

In [11]:
trainX,trainY = createTimeSeriesData(train_inputX, train_inputY, n_future, n_past)

In [12]:
print(trainX.shape)
print(trainY.shape)

(114793, 200, 44, 6, 1)
(114793, 44)


In [13]:
df_forTesting, noOfCells = processDataFrame('vehicle-status-data-0.20.csv')

['VehicleType', 'PhaseStatus', 'PhaseElapsedTime', 'Speed', 'DistanceToStopBar', 'FrontCellStatus', 'FrontCellVehicleSpeed', 'CellStatus']
   VehicleType  PhaseStatus  PhaseElapsedTime  Speed  DistanceToStopBar  \
0          0.0          4.0               0.0   -1.0              11.43   
1          0.0          4.0               0.0   -1.0              19.05   
2          0.0          4.0               0.0   -1.0              26.67   
3          0.0          4.0               0.0   -1.0              34.29   
4          0.0          4.0               0.0   -1.0              41.91   

   FrontCellStatus  FrontCellVehicleSpeed  CellStatus  
0              0.0                   -1.0         0.0  
1              0.0                   -1.0         0.0  
2              0.0                   -1.0         0.0  
3              0.0                   -1.0         0.0  
4              0.0                   -1.0         0.0  


In [14]:
test_inputX, test_inputY = getProcessedDataSet(df_forTesting, noOfCells)
print(test_inputX.shape)
print(test_inputY.shape)

(11495, 44, 6)
(11495, 44)


In [15]:
testX,testY = createTimeSeriesData(test_inputX, test_inputY, n_future, n_past)

In [16]:
print(testX.shape)
print(testY.shape)

(11296, 200, 44, 6, 1)
(11296, 44)


In [17]:
input_shape=(n_past, trainX.shape[2], trainX.shape[3], 1)
noOfCells = trainX.shape[2]

print(input_shape)
print(noOfCells)

(200, 44, 6, 1)
44


In [18]:
model = createModel(input_shape, noOfCells)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed (TimeDistri (None, 200, 42, 4, 44)    440       
_________________________________________________________________
time_distributed_1 (TimeDist (None, 200, 42, 4, 44)    0         
_________________________________________________________________
time_distributed_2 (TimeDist (None, 200, 40, 2, 22)    8734      
_________________________________________________________________
time_distributed_3 (TimeDist (None, 200, 40, 2, 22)    0         
_________________________________________________________________
time_distributed_4 (TimeDist (None, 200, 1760)         0         
_________________________________________________________________
lstm (LSTM)                  (None, 200, 30)           214920    
_________________________________________________________________
dropout_2 (Dropout)          (None, 200, 30)           0

In [19]:
noOfIteration = 20
model_evaluation = evaluateModel(model,noOfIteration)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
0.06003616377711296


In [20]:
# make predictions
scaler = StandardScaler()
scaler = scaler.fit(testY)
testPredict = model.predict(testX)
testPredict = scaler.inverse_transform(testPredict)

In [21]:
testScore = math.sqrt(mean_squared_error(testY[:,0], testPredict[:,0]))

In [22]:
testScore

0.40863362596132313

In [33]:
def analyzeSampleData(sample_inputX, sample_inputY, n_future, n_past, model, testY):
    sampleX = [] 
    sampleY = []
    fields = ['SampleCellStatus', 'PredictedCellStatus','Results', 'NoOfTrueValue', 'NoOfFalseValue']
    filename = open("sample-results-0.10.csv",'w')

    csvwriter = csv.writer(filename) 

        # writing the fields 
    csvwriter.writerow(fields) 
    for i in range (n_past, sample_inputX.shape[0] - n_future + 1):
        sampleX = []
        sampleY = []
        results = []
        rows = []
        sampleX.append(sample_inputX[i-n_past:i,])

        sampleX = np.array(sampleX)
        sampleX = sampleX.reshape(sampleX.shape[0], sampleX.shape[1], sampleX.shape[2], sampleX.shape[3], 1)

        sampleY = sample_inputY[i-1,]

        
        scaler = StandardScaler()
        scaler = scaler.fit(testY)
        samplePredict = model.predict(sampleX)
        samplePredict = scaler.inverse_transform(samplePredict)
        
        for index in range(len(sampleY)):
 
            if sampleY[index] >0 and samplePredict[:,index] <= 0.3:
                output = False
                results.append(output)
                
            elif sampleY[index] <=0 and samplePredict[:,index] >=0.2:
                output = False
                results.append(output)
                
            elif sampleY[index] == samplePredict[:,index]:
                output = True
                results.append(output)
                
            else: 
                output = True
                results.append(output)
            filename = open("sample-results-0.10.csv",'a')
            csvwriter = csv.writer(filename)    
            rows = [sampleY[index], samplePredict[:,index], output, results.count(True), results.count(False)]
            csvwriter.writerow(rows)
        filename.close()
      

    

In [34]:
sample_df, noOfCells = processDataFrame('sample-vehicle-status-data-0.10.csv')

['CellNo', 'NoOfConnectedVehicle', 'NoOfNonConnectedVehicle', 'ConnectedVehicleId', 'NonConnectedVehicleId', 'VehicleType', 'PhaseStatus', 'PhaseElapsedTime', 'Speed', 'DistanceToStopBar', 'FrontCellStatus', 'FrontCellVehicleSpeed', 'CellStatus']
   CellNo  NoOfConnectedVehicle  NoOfNonConnectedVehicle  ConnectedVehicleId  \
0     1.0                   0.0                      5.0                 0.0   
1     2.0                   0.0                      5.0                 0.0   
2     3.0                   0.0                      5.0                 0.0   
3     4.0                   0.0                      5.0                 0.0   
4     5.0                   0.0                      5.0                 0.0   

   NonConnectedVehicleId  VehicleType  PhaseStatus  PhaseElapsedTime  Speed  \
0                   45.0          0.0          4.0               0.2   -1.0   
1                   46.0          0.0          4.0               0.2   -1.0   
2                    0.0          0

In [35]:
sample_inputX, sample_inputY = getProcessedDataSet(sample_df, noOfCells)

In [36]:
print(sample_inputX.shape)
print(sample_inputY.shape)

(2741, 44, 6)
(2741, 44)


In [37]:
analyzeSampleData(sample_inputX, sample_inputY, n_future, n_past, model, testY)

In [None]:
def getCurrentSignalTimingPlan