In [493]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from torch.utils.data import Dataset, DataLoader
import numpy as np
import plotly.graph_objects as go

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [494]:
class BikeDatasetCount(Dataset):
    def __init__(self, data):
        self.X = torch.tensor(data.iloc[:, :-3].values, dtype=torch.float32)
        self.y = torch.tensor(data.iloc[:, -1].values, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [495]:
class BikeDatasetCasual(Dataset):
    def __init__(self, data):
        self.X = torch.tensor(data.iloc[:, :-3].values, dtype=torch.float32)
        self.y = torch.tensor(data.iloc[:, -3].values, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [496]:
class BikeDatasetRegistered(Dataset):
    def __init__(self, data):
        self.X = torch.tensor(data.iloc[:, :-3].values, dtype=torch.float32)
        self.y = torch.tensor(data.iloc[:, -2].values, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [497]:
raw_data = pd.read_csv("./data.csv", delimiter=",")

In [498]:
clean_data = raw_data.drop(['instant', 'dteday'], axis=1)

In [499]:
scaler = MinMaxScaler()

clean_data.iloc[:, :-3] = scaler.fit_transform(clean_data.iloc[:, :-3])

train_data, test_data = train_test_split(clean_data, test_size=0.2, random_state=42)


In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`



In [500]:
# Define the dataset and dataloader objects
train_dataset = BikeDatasetCount(train_data)
test_dataset = BikeDatasetCount(test_data)
train_dataloader = DataLoader(train_dataset, batch_size=124, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=124, shuffle=False)

In [501]:
next(iter(train_dataloader))

[tensor([[0.0000, 1.0000, 0.0909,  ..., 0.4575, 0.6700, 0.1579],
         [0.3333, 0.0000, 0.4545,  ..., 0.6779, 0.4400, 0.1930],
         [0.3333, 0.0000, 0.4545,  ..., 0.5932, 0.8800, 0.1053],
         ...,
         [0.6667, 0.0000, 0.6364,  ..., 0.6610, 0.8300, 0.3860],
         [0.3333, 0.0000, 0.3636,  ..., 0.5932, 0.6000, 0.4562],
         [0.0000, 1.0000, 0.0909,  ..., 0.4407, 0.4300, 0.5439]]),
 tensor([  2., 330.,   5., 209.,   1., 609., 280., 153., 416.,   7.,  29., 204.,
          32., 552., 540.,  78., 180.,   4., 300.,   7., 139.,  95.,  14., 272.,
         149.,  86.,   1., 491., 387., 437.,  69., 112., 248., 172., 390.,   7.,
         150., 179.,  28., 263., 517.,   5., 332., 129.,  75.,  46.,  53., 394.,
         207., 211., 355.,  36.,  96., 379., 233., 274., 382., 496.,  15.,  23.,
           6., 182.,  12., 189.,  56., 104.,  80.,  88.,  97.,  33., 113.,  10.,
          90., 265., 159., 144.,   9., 141., 229.,  39., 168.,  14.,  12., 126.,
          89., 782., 133., 

In [502]:
len(clean_data.columns) - 3

12

In [503]:
# Define the neural network architecture
class FirstNetwork(nn.Module):
    def __init__(self):
        super(FirstNetwork, self).__init__()
        self.fc1 = nn.Linear(12, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.tanh = nn.Tanh()
        self.dropout = nn.Dropout(p=0.2)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.tanh(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.tanh(x)
        x = self.dropout(x)
        x = self.fc3(x)
        return x

In [504]:
model = FirstNetwork()

# Move model to device
model.to(device)

criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

training_losses = []
testing_losses = []

# Train the model
num_epochs = 100
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(train_dataloader):
        inputs, labels = data
        
        # Move data to device
        inputs = inputs.to(device)
        labels = labels.to(device)


        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels.unsqueeze(1))
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    training_losses.append(running_loss / len(train_dataloader))

    model.eval()
    with torch.no_grad():
        test_loss = 0
        for inputs, targets in test_dataloader:

            # Move data to device
            inputs = inputs.to(device)
            targets = targets.to(device)
            outputs = model(inputs)

            test_loss += criterion(outputs, targets.unsqueeze(1))
        testing_losses.append(test_loss / len(test_dataloader))
    print(f"Epoch {epoch+1}: Train loss = {running_loss / len(train_dataloader)}, test loss: {test_loss / len(test_dataloader)}")

Epoch 1: Train loss = 37874.84600572183, test loss: 33089.046875
Epoch 2: Train loss = 32835.552156690144, test loss: 33090.31640625
Epoch 3: Train loss = 32809.300616197186, test loss: 33089.14453125
Epoch 4: Train loss = 32749.17930237676, test loss: 33094.53515625
Epoch 5: Train loss = 32642.524867957745, test loss: 33129.79296875
Epoch 6: Train loss = 32684.797095070422, test loss: 33089.3828125
Epoch 7: Train loss = 32793.00013754401, test loss: 33093.2890625
Epoch 8: Train loss = 32716.381519586266, test loss: 33089.16796875
Epoch 9: Train loss = 32747.300643705985, test loss: 33088.9921875
Epoch 10: Train loss = 32745.5810409331, test loss: 33128.7421875
Epoch 11: Train loss = 32671.85857724472, test loss: 33099.02734375
Epoch 12: Train loss = 32850.431860695426, test loss: 33090.90234375
Epoch 13: Train loss = 32613.921544894365, test loss: 33099.2265625
Epoch 14: Train loss = 32648.710634903167, test loss: 33089.0078125
Epoch 15: Train loss = 32700.031552596833, test loss: 331

In [505]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=list(range(len(training_losses))),
    y=training_losses,
    name="Train"))
fig.add_trace(go.Scatter(
    x=list(range(len(training_losses))),
    y=testing_losses,
    name="Test"
))

fig.update_layout(
    title='12x64x32x1, tanh, SGD, lr 0.01, 100 epochs',
    xaxis_title="Epoch",
    yaxis_title="MSE"
)

fig.show()

In [508]:
print(model)

FirstNetwork(
  (fc1): Linear(in_features=12, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=1, bias=True)
  (tanh): Tanh()
  (dropout): Dropout(p=0.2, inplace=False)
)


Część wyników jest ujemnych więc dajemy średnią

In [None]:
# calculate avaeger cnt 
average_cnt = np.mean(outputs)
for i in range(len(outputs)):
    if outputs[i] < 0:
        outputs[i] = average_cnt
np.savetxt("output.csv", outputs, delimiter=",")