In [1]:
!rmdir /S /Q lightning_logs

In [2]:
import feature_engine.encoding
import feature_engine.pipeline
import feature_engine.selection
import feature_engine.wrappers
import lightning
import pandas
import sklearn.preprocessing
import torch
import torchmetrics

In [3]:
lightning.seed_everything(0)

Seed set to 0


0

In [4]:
train_df = pandas.read_csv("train.csv")
train_df

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count
0,2011-01-01 00:00:00,1,0,0,1,9.84,14.395,81,0.0000,3,13,16
1,2011-01-01 01:00:00,1,0,0,1,9.02,13.635,80,0.0000,8,32,40
2,2011-01-01 02:00:00,1,0,0,1,9.02,13.635,80,0.0000,5,27,32
3,2011-01-01 03:00:00,1,0,0,1,9.84,14.395,75,0.0000,3,10,13
4,2011-01-01 04:00:00,1,0,0,1,9.84,14.395,75,0.0000,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
10881,2012-12-19 19:00:00,4,0,1,1,15.58,19.695,50,26.0027,7,329,336
10882,2012-12-19 20:00:00,4,0,1,1,14.76,17.425,57,15.0013,10,231,241
10883,2012-12-19 21:00:00,4,0,1,1,13.94,15.910,61,15.0013,4,164,168
10884,2012-12-19 22:00:00,4,0,1,1,13.94,17.425,61,6.0032,12,117,129


In [5]:
pipeline = feature_engine.pipeline.make_pipeline(
    feature_engine.selection.DropFeatures(features_to_drop=["datetime", "casual", "registered"]),
    feature_engine.wrappers.SklearnTransformerWrapper(transformer=sklearn.preprocessing.MinMaxScaler(), variables=["temp", "atemp", "humidity", "windspeed"]),
)

In [6]:
train_transformed_df = pipeline.fit_transform(train_df.drop(columns=["count"]))
train_transformed_df

Unnamed: 0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed
0,1,0,0,1,0.224490,0.305068,0.81,0.000000
1,1,0,0,1,0.204082,0.288064,0.80,0.000000
2,1,0,0,1,0.204082,0.288064,0.80,0.000000
3,1,0,0,1,0.224490,0.305068,0.75,0.000000
4,1,0,0,1,0.224490,0.305068,0.75,0.000000
...,...,...,...,...,...,...,...,...
10881,4,0,1,1,0.367347,0.423649,0.50,0.456213
10882,4,0,1,1,0.346939,0.372860,0.57,0.263195
10883,4,0,1,1,0.326531,0.338964,0.61,0.263195
10884,4,0,1,1,0.326531,0.372860,0.61,0.105325


In [7]:
count_df = train_df[["count"]]
count_df

Unnamed: 0,count
0,16
1,40
2,32
3,13
4,1
...,...
10881,336
10882,241
10883,168
10884,129


In [8]:
count_scaler = sklearn.preprocessing.MinMaxScaler()
count_transformed_array = count_scaler.fit_transform(count_df)
count_transformed_array

array([[0.01536885],
       [0.03995902],
       [0.0317623 ],
       ...,
       [0.17110656],
       [0.13114754],
       [0.08913934]])

In [9]:
class MyModel(lightning.LightningModule):
    def __init__(self):
        super().__init__()
        self._stack = torch.nn.Sequential(
            torch.nn.Linear(8, 16), torch.nn.ReLU(),
            torch.nn.Linear(16, 1),
        )

    def forward(self, x):
        return self._stack(x)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters())

    def training_step(self, batch, batch_idx):
        loss = self._compute(batch, batch_idx)
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        loss = self._compute(batch, batch_idx)
        self.log("val_loss", loss)
        return loss

    def test_step(self, batch, batch_idx):
        loss = self._compute(batch, batch_idx)
        self.log("test_loss", loss)
        return loss
    
    def _compute(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = torch.nn.functional.mse_loss(y_hat, y)
        return loss

In [10]:
class MyData(lightning.LightningDataModule):
    def __init__(self):
        super().__init__()

    def prepare_data(self):
        train_X = torch.tensor(train_transformed_df.values, dtype=torch.float32)
        train_y = torch.tensor(count_transformed_array, dtype=torch.float32)
        full_data = torch.utils.data.TensorDataset(train_X, train_y)
        n_train = int(len(full_data) * 0.8)
        n_val = int(len(full_data) * 0.1)
        n_test = len(full_data) - n_train - n_val
        self._train_data, self._val_data, self._test_data = torch.utils.data.random_split(full_data, [n_train, n_val, n_test])

    def train_dataloader(self):
        return torch.utils.data.DataLoader(self._train_data, batch_size=64)

    def val_dataloader(self):
        return torch.utils.data.DataLoader(self._val_data, batch_size=64)

    def test_dataloader(self):
        return torch.utils.data.DataLoader(self._test_data)

In [11]:
trainer = lightning.Trainer(max_epochs=100, callbacks=[lightning.pytorch.callbacks.early_stopping.EarlyStopping(monitor="val_loss", mode="min")])
model = MyModel()
loader = MyData()
trainer.fit(model, datamodule=loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | _stack | Sequential | 161    | train
----------------------------------------------
161       Trainable params
0         Non-trainable params
161       Total params
0.001     Total estimated model params size (MB)
4         Modules in train mode
0         Modules in eval mode


Sanity Checking: |                                                                               | 0/? [00:00<…

C:\Users\hsmtk\repo\fantastic-octo-dollop\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
C:\Users\hsmtk\repo\fantastic-octo-dollop\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Training: |                                                                                      | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

In [12]:
trainer.test(datamodule=loader)

Restoring states from the checkpoint path at C:\Users\hsmtk\repo\fantastic-octo-dollop\bike-sharing-demand\lightning_logs\version_0\checkpoints\epoch=61-step=8494.ckpt
Loaded model weights from the checkpoint at C:\Users\hsmtk\repo\fantastic-octo-dollop\bike-sharing-demand\lightning_logs\version_0\checkpoints\epoch=61-step=8494.ckpt
C:\Users\hsmtk\repo\fantastic-octo-dollop\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing: |                                                                                       | 0/? [00:00<…

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss          0.025815362110733986
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.025815362110733986}]

In [14]:
!taskkill /im tensorboard.exe /f
!del /q %TMP%\.tensorboard-info\*
%reload_ext tensorboard
%tensorboard --logdir=lightning_logs/

SUCCESS: The process "tensorboard.exe" with PID 18368 has been terminated.


In [16]:
test_df = pandas.read_csv("test.csv")
test_df

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed
0,2011-01-20 00:00:00,1,0,1,1,10.66,11.365,56,26.0027
1,2011-01-20 01:00:00,1,0,1,1,10.66,13.635,56,0.0000
2,2011-01-20 02:00:00,1,0,1,1,10.66,13.635,56,0.0000
3,2011-01-20 03:00:00,1,0,1,1,10.66,12.880,56,11.0014
4,2011-01-20 04:00:00,1,0,1,1,10.66,12.880,56,11.0014
...,...,...,...,...,...,...,...,...,...
6488,2012-12-31 19:00:00,1,0,1,2,10.66,12.880,60,11.0014
6489,2012-12-31 20:00:00,1,0,1,2,10.66,12.880,60,11.0014
6490,2012-12-31 21:00:00,1,0,1,1,10.66,12.880,60,11.0014
6491,2012-12-31 22:00:00,1,0,1,1,10.66,13.635,56,8.9981


In [18]:
tmp_df = test_df.copy()
tmp_df["casual"] = 0
tmp_df["registered"] = 0
test_transformed_df = pipeline.transform(tmp_df)
test_transformed_df

Unnamed: 0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed
0,1,0,1,1,0.244898,0.237275,0.56,0.456213
1,1,0,1,1,0.244898,0.288064,0.56,0.000000
2,1,0,1,1,0.244898,0.288064,0.56,0.000000
3,1,0,1,1,0.244898,0.271171,0.56,0.193018
4,1,0,1,1,0.244898,0.271171,0.56,0.193018
...,...,...,...,...,...,...,...,...
6488,1,0,1,2,0.244898,0.271171,0.60,0.193018
6489,1,0,1,2,0.244898,0.271171,0.60,0.193018
6490,1,0,1,1,0.244898,0.271171,0.60,0.193018
6491,1,0,1,1,0.244898,0.288064,0.56,0.157870


In [22]:
with torch.no_grad():
    predicted = model(torch.tensor(test_transformed_df.values, dtype=torch.float32))

In [23]:
predicted

tensor([[0.0846],
        [0.1132],
        [0.1132],
        ...,
        [0.1017],
        [0.1055],
        [0.1056]])

In [25]:
predicted_inversed = count_scaler.inverse_transform(predicted.numpy())
predicted_inversed

array([[ 83.53069 ],
       [111.45604 ],
       [111.45604 ],
       ...,
       [100.257065],
       [103.939285],
       [104.062035]], dtype=float32)

In [26]:
predicted_inversed.astype(int)

array([[ 83],
       [111],
       [111],
       ...,
       [100],
       [103],
       [104]])