In [63]:
import pandas as pd
from sklearn.model_selection import train_test_split
from torch import nn
import torch
import numpy as np

In [64]:
tr = pd.read_csv("traffic.csv", parse_dates = ["DateTime"])
tr

Unnamed: 0,DateTime,Junction,Vehicles,ID
0,2015-11-01 00:00:00,1,15,20151101001
1,2015-11-01 01:00:00,1,13,20151101011
2,2015-11-01 02:00:00,1,10,20151101021
3,2015-11-01 03:00:00,1,7,20151101031
4,2015-11-01 04:00:00,1,9,20151101041
...,...,...,...,...
48115,2017-06-30 19:00:00,4,11,20170630194
48116,2017-06-30 20:00:00,4,30,20170630204
48117,2017-06-30 21:00:00,4,16,20170630214
48118,2017-06-30 22:00:00,4,22,20170630224


In [65]:
tr["Year"] = tr.DateTime.dt.year
tr["Month"] = tr.DateTime.dt.month
tr["Day"] = tr.DateTime.dt.day
tr["Dayofweek"] = tr.DateTime.dt.dayofweek
tr["Dayofyear"] = tr.DateTime.dt.dayofyear
tr["Hour"] = tr.DateTime.dt.hour

tr.drop("DateTime", axis=1, inplace=True)
tr.drop("ID", axis=1, inplace=True)
tr

Unnamed: 0,Junction,Vehicles,Year,Month,Day,Dayofweek,Dayofyear,Hour
0,1,15,2015,11,1,6,305,0
1,1,13,2015,11,1,6,305,1
2,1,10,2015,11,1,6,305,2
3,1,7,2015,11,1,6,305,3
4,1,9,2015,11,1,6,305,4
...,...,...,...,...,...,...,...,...
48115,4,11,2017,6,30,4,181,19
48116,4,30,2017,6,30,4,181,20
48117,4,16,2017,6,30,4,181,21
48118,4,22,2017,6,30,4,181,22


In [66]:
tr["Junction"].value_counts()

1    14592
2    14592
3    14592
4     4344
Name: Junction, dtype: int64

In [67]:
tr.isna().sum()

Junction     0
Vehicles     0
Year         0
Month        0
Day          0
Dayofweek    0
Dayofyear    0
Hour         0
dtype: int64

In [68]:
tr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48120 entries, 0 to 48119
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   Junction   48120 non-null  int64
 1   Vehicles   48120 non-null  int64
 2   Year       48120 non-null  int64
 3   Month      48120 non-null  int64
 4   Day        48120 non-null  int64
 5   Dayofweek  48120 non-null  int64
 6   Dayofyear  48120 non-null  int64
 7   Hour       48120 non-null  int64
dtypes: int64(8)
memory usage: 2.9 MB


In [69]:
tr = pd.get_dummies(tr, drop_first=True, columns=["Junction"])
tr

Unnamed: 0,Vehicles,Year,Month,Day,Dayofweek,Dayofyear,Hour,Junction_2,Junction_3,Junction_4
0,15,2015,11,1,6,305,0,0,0,0
1,13,2015,11,1,6,305,1,0,0,0
2,10,2015,11,1,6,305,2,0,0,0
3,7,2015,11,1,6,305,3,0,0,0
4,9,2015,11,1,6,305,4,0,0,0
...,...,...,...,...,...,...,...,...,...,...
48115,11,2017,6,30,4,181,19,0,0,1
48116,30,2017,6,30,4,181,20,0,0,1
48117,16,2017,6,30,4,181,21,0,0,1
48118,22,2017,6,30,4,181,22,0,0,1


In [70]:
tr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48120 entries, 0 to 48119
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype
---  ------      --------------  -----
 0   Vehicles    48120 non-null  int64
 1   Year        48120 non-null  int64
 2   Month       48120 non-null  int64
 3   Day         48120 non-null  int64
 4   Dayofweek   48120 non-null  int64
 5   Dayofyear   48120 non-null  int64
 6   Hour        48120 non-null  int64
 7   Junction_2  48120 non-null  uint8
 8   Junction_3  48120 non-null  uint8
 9   Junction_4  48120 non-null  uint8
dtypes: int64(7), uint8(3)
memory usage: 2.7 MB


In [71]:
X = tr.drop("Vehicles", axis=1)
y = tr["Vehicles"]

In [72]:
import torch
X = torch.from_numpy(X.to_numpy()).type(torch.float)
y = torch.from_numpy(y.to_numpy()).type(torch.float).unsqueeze(dim=1)

In [73]:
X.shape, y.shape

(torch.Size([48120, 9]), torch.Size([48120, 1]))

In [74]:
X[:2]

tensor([[2.0150e+03, 1.1000e+01, 1.0000e+00, 6.0000e+00, 3.0500e+02, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00],
        [2.0150e+03, 1.1000e+01, 1.0000e+00, 6.0000e+00, 3.0500e+02, 1.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00]])

In [75]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [76]:
X_train.shape, y_train.shape

(torch.Size([38496, 9]), torch.Size([38496, 1]))

In [77]:
X_train[:2]

tensor([[2.0160e+03, 3.0000e+00, 2.1000e+01, 0.0000e+00, 8.1000e+01, 1.3000e+01,
         0.0000e+00, 1.0000e+00, 0.0000e+00],
        [2.0150e+03, 1.1000e+01, 2.1000e+01, 5.0000e+00, 3.2500e+02, 1.2000e+01,
         0.0000e+00, 0.0000e+00, 0.0000e+00]])

In [78]:
y_train[:2]

tensor([[13.],
        [18.]])

In [79]:
len(X_train[0])

9

In [80]:
len(X[0])

9

In [81]:
y[0]

tensor([15.])

In [96]:
from torch import nn
h=18
class circlemodel(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer1 = nn.Linear(in_features=9, out_features=h)
    self.layer2 = nn.Linear(in_features=h, out_features=h)
    self.layer3 = nn.Linear(in_features=h, out_features=1)
    self.relu = nn.ReLU()

  def forward(self, x):
    return self.layer3(self.relu(self.layer2(self.relu(self.layer1(x)))))

model=circlemodel()
model

loss_fn = nn.L1Loss()
optimizer = torch.optim.SGD(params = model.parameters(), lr=0.001)

epochs = 1000

for epoch in range(epochs):
  model.train()
  y_pred = model(X_train)
  loss = loss_fn(y_pred, y_train)
  optimizer.zero_grad()
  loss.backward
  optimizer.step()

  model.eval()
  with torch.inference_mode():
    test_pred = model(X_test)
    test_loss = loss_fn(test_pred, y_test)

    if epoch % 100 == 0:
      print(f"Epoch: {epoch} | Train loss: {loss} | Test loss: {test_loss}")

Epoch: 0 | Train loss: 91.4617919921875 | Test loss: 91.3261489868164
Epoch: 100 | Train loss: 91.4617919921875 | Test loss: 91.3261489868164
Epoch: 200 | Train loss: 91.4617919921875 | Test loss: 91.3261489868164
Epoch: 300 | Train loss: 91.4617919921875 | Test loss: 91.3261489868164
Epoch: 400 | Train loss: 91.4617919921875 | Test loss: 91.3261489868164
Epoch: 500 | Train loss: 91.4617919921875 | Test loss: 91.3261489868164
Epoch: 600 | Train loss: 91.4617919921875 | Test loss: 91.3261489868164
Epoch: 700 | Train loss: 91.4617919921875 | Test loss: 91.3261489868164
Epoch: 800 | Train loss: 91.4617919921875 | Test loss: 91.3261489868164
Epoch: 900 | Train loss: 91.4617919921875 | Test loss: 91.3261489868164


In [93]:
y_pred = model(X_train)

In [38]:
y_train[8]

tensor([88.])

In [39]:
tr["Vehicles"].value_counts()

8      2325
9      2290
7      2220
6      2206
10     2145
       ... 
129       1
141       1
135       1
143       1
162       1
Name: Vehicles, Length: 141, dtype: int64

In [40]:
model(X_test[0]), y_test[0]

(tensor([29.9929], grad_fn=<ViewBackward0>), tensor([28.]))

In [41]:
for h in range(9, 100):

  model = nn.Sequential(
      nn.Linear(in_features=9, out_features=h),
      nn.ReLU(),
      nn.Linear(in_features=h, out_features=h),
      nn.ReLU(),
       nn.Linear(in_features=h, out_features=h),
      nn.ReLU(),
      nn.Linear(in_features=h, out_features=1)
  )

  loss_fn = nn.L1Loss()
  optimizer = torch.optim.SGD(params=model.parameters(), lr=0.01)

  epochs = 10

  for epoch in range(epochs):
    model.train()
    y_pred = model(X_train)
    loss = loss_fn(y_pred, y_train)
    optimizer.zero_grad()
    loss.backward
    optimizer.step()

    model.eval()
    with torch.inference_mode():
      test_pred = model(X_test)
      test_loss = loss_fn(test_pred, y_test)


  print(f"H: {h} | Epoch: {epoch} | Train loss: {loss} | Test loss: {test_loss}")

H: 9 | Epoch: 9 | Train loss: 22.186975479125977 | Test loss: 22.11625862121582
H: 10 | Epoch: 9 | Train loss: 30.51224136352539 | Test loss: 30.427675247192383
H: 11 | Epoch: 9 | Train loss: 19.237083435058594 | Test loss: 19.151424407958984
H: 12 | Epoch: 9 | Train loss: 67.55164337158203 | Test loss: 67.47232055664062
H: 13 | Epoch: 9 | Train loss: 47.74536895751953 | Test loss: 47.65311813354492
H: 14 | Epoch: 9 | Train loss: 14.054585456848145 | Test loss: 13.968667030334473
H: 15 | Epoch: 9 | Train loss: 46.3726692199707 | Test loss: 46.28767776489258
H: 16 | Epoch: 9 | Train loss: 14.269466400146484 | Test loss: 14.168828964233398
H: 17 | Epoch: 9 | Train loss: 28.2728214263916 | Test loss: 28.1898193359375
H: 18 | Epoch: 9 | Train loss: 33.40559005737305 | Test loss: 33.32427215576172
H: 19 | Epoch: 9 | Train loss: 14.854588508605957 | Test loss: 14.7599458694458
H: 20 | Epoch: 9 | Train loss: 34.3399658203125 | Test loss: 34.263092041015625
H: 21 | Epoch: 9 | Train loss: 20.36

In [54]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_log_error, mean_absolute_error

def show_scores(model):
    train_preds = model.predict(X_train)
    test_preds = model.predict(X_test)

    scores = {"msle" : mean_squared_log_error(y_test, test_preds),
             "rmsle" : np.sqrt(mean_squared_log_error(y_test, test_preds)),
            "mae" : mean_absolute_error(y_test, test_preds),
            "score" : model.score(X_test, y_test)}
    return scores

model1 = RandomForestRegressor(n_jobs = -1)
model1.fit(X_train, y_train)
show_scores(model1)

{'msle': 0.044932228248590715,
 'rmsle': 0.2119722346171562,
 'mae': 2.4534393183707395,
 'score': 0.9597066997097429}

In [58]:
model1.predict(X_train[:5])

array([ 8.02, 15.41, 15.12, 21.8 , 61.77])

In [61]:
y_train[:5]

32555     8
36983    15
751      16
27224    23
8005     60
Name: Vehicles, dtype: int64