In [6]:
from sklearn.externals import joblib
import pandas as pd
loaded_model = joblib.load('svm.joblib')
route800 = pd.read_csv('route_800.csv')
route800 = route800.replace({'Nighttime': 0, 'Morning Peak': 1, 'Midday': 2, 'Evening Peak': 3})
xTest800 = route800.loc[37695:, ['route_id', 'speed', 'occupancy_status', 'dist_to_stop', 'sched_speed', 'dwell_time', 'weekday', 'time_of_day', 'prevAvg']].values
yPred800 = loaded_model.predict(xTest800)

In [7]:
yPred800

array([18.75550157,  3.4487453 , 31.09728069, ..., 34.02851742,
       34.02851742, 34.35395344])

In [None]:
import torch.nn as nn
from torch.utils.data import Dataset
import torch
import numpy as np
import math
import pandas as pd
from sklearn.preprocessing import MinMaxScaler


class feature_data_util():

    def getAllData(self,filepath=None):
        if filepath is None:
            filepath = "/content/sample_data/vp_clean_k_prev_busses.csv"
        df = pd.read_csv(filepath,low_memory=True)
        df.dropna(inplace=True)

        feature_columns = ['route_id','speed','occupancy_status','stop_name','dist_to_stop','sched_speed','dwell_time','weekday','time_of_day','prevAvg']
        stop_names = list(df['stop_name'].unique())
        stop_name_map = {stop_names[i]:i for i in range(len(stop_names))}
        df['stop_name'] = df['stop_name'].map(stop_name_map)

        time_of_days = list(df['time_of_day'].unique())
        time_of_day_map = {time_of_days[i]:i for i in range(len(time_of_days))}
        df['time_of_day'] = df['time_of_day'].map(time_of_day_map)


        route_ids = list(df['route_id'].unique())
        route_ids_map = {route_ids[i]:i for i in range(len(route_ids))}
        df['route_id'] = df['route_id'].map(route_ids_map)

        #X = df[feature_columns][:k]
        X = df[feature_columns]

        def convert1(x):
            if x == 0:
                return 0
            return math.ceil(x / 60)

        df['delay'] = df['delay'].apply(convert1)

        def convert2(x):
            if x <= 10:
                return x
            if 10 < x <= 15:
                return 11
            if 15 < x <= 20:
                return 12
            if 20 < x <= 30:
                return 13
            return 14

        df['delay'] = df['delay'].apply(convert2)
        y = df['delay'].values

        #y = df['delay'][:k].values

        #one_hot_y = np.eye(num_classes)[y]
        return y,self.pre_process(X)


    def pre_process(self,data):
        mm = MinMaxScaler()
        scaled_data = mm.fit_transform(data)
        return scaled_data



class FeatureDataSet(Dataset):
    def __init__(self ,path=None):
        dataUtil=feature_data_util()

        self.lableData, self.trainData = dataUtil.getAllData(path)

        self.trainData = torch.from_numpy(self.trainData.astype(np.float32))
        self.lableData = torch.from_numpy(self.lableData.astype(np.float32))


    def __getitem__(self, index):
        train  = self.trainData[index]
        label  = self.lableData[index]

        return train, label

    def __len__(self):
        return len(self.trainData)

    def dataSet(self):
        return self.trainData,self.lableData

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.fc_layers = nn.ModuleList()
        self.fc_layers.extend([nn.Linear(10, 50), nn.ReLU()])
        self.fc_layers.extend([nn.Linear(50, 200), nn.ReLU()])
        self.fc_layers.extend([nn.Linear(200, 80),nn.ReLU()])
        self.fc_layers.extend([nn.Linear(80, 30),nn.ReLU()])
        self.fc_layers.extend([nn.Linear(30, 15)])

        self.encoder = nn.Sequential(*self.fc_layers)

    def forward(self, X):
        return self.encoder(X)

In [None]:
import pandas as pd
import torch
import seaborn
import matplotlib.pyplot as plt
# Please change the path
train_dataSet = FeatureDataSet("/content/sample_data/route_800.csv")
train_size = int(0.8 * len(train_dataSet))
test_size = len(train_dataSet) - train_size
train_dataSet, val_db = torch.utils.data.random_split(train_dataSet, [train_size, test_size],generator=torch.Generator().manual_seed(42))
model = Net()
model.load_state_dict(torch.load('/content/mode/model2.pth'))
model.eval()
pred = model.forward(val_db.dataset.trainData)

pred = pred.argmax(dim=1)


pred =pred.numpy().reshape(-1,1)
df = pd.DataFrame(pred)
df.columns = ['pred']
delay_df = df.groupby(by=['pred'])['pred'].count()
daley = {'delay':delay_df.index,'numbers':delay_df.values}
daley_dis = pd.DataFrame(daley)
plt.figure(figsize=(15,10))
seaborn.barplot(daley_dis['delay'],daley_dis['numbers'],color = 'salmon')
plt.xlabel("delay time ")
plt.ylabel("number ")
plt.title("route 800 distrubution of Residuals")
plt.show()