In [1]:
!rmdir /S /Q lightning_logs

In [2]:
import feature_engine.encoding
import feature_engine.pipeline
import feature_engine.selection
import feature_engine.wrappers
import lightning
import pandas
import sklearn.preprocessing
import torch
import torchmetrics

In [3]:
lightning.seed_everything(0)

Seed set to 0


0

In [4]:
train_df = pandas.read_csv("train.csv")

In [5]:
train_df

Unnamed: 0,id,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length,loan_status
0,0,37,35000,RENT,0.0,EDUCATION,B,6000,11.49,0.17,N,14,0
1,1,22,56000,OWN,6.0,MEDICAL,C,4000,13.35,0.07,N,2,0
2,2,29,28800,OWN,8.0,PERSONAL,A,6000,8.90,0.21,N,10,0
3,3,30,70000,RENT,14.0,VENTURE,B,12000,11.11,0.17,N,5,0
4,4,22,60000,RENT,2.0,MEDICAL,A,6000,6.92,0.10,N,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
58640,58640,34,120000,MORTGAGE,5.0,EDUCATION,D,25000,15.95,0.21,Y,10,0
58641,58641,28,28800,RENT,0.0,MEDICAL,C,10000,12.73,0.35,N,8,1
58642,58642,23,44000,RENT,7.0,EDUCATION,D,6800,16.00,0.15,N,2,1
58643,58643,22,30000,RENT,2.0,EDUCATION,A,5000,8.90,0.17,N,3,0


In [6]:
train_df.isnull().sum()

id                            0
person_age                    0
person_income                 0
person_home_ownership         0
person_emp_length             0
loan_intent                   0
loan_grade                    0
loan_amnt                     0
loan_int_rate                 0
loan_percent_income           0
cb_person_default_on_file     0
cb_person_cred_hist_length    0
loan_status                   0
dtype: int64

In [7]:
pipeline = feature_engine.pipeline.make_pipeline(
    feature_engine.selection.DropFeatures(features_to_drop=["id"]),
    feature_engine.encoding.OrdinalEncoder(encoding_method="arbitrary", variables=["person_home_ownership", "loan_intent", "loan_grade", "cb_person_default_on_file"]),
    feature_engine.wrappers.SklearnTransformerWrapper(transformer=sklearn.preprocessing.MinMaxScaler(), variables=["person_age", "person_income", "person_emp_length", "loan_amnt", "loan_int_rate", "loan_percent_income", "cb_person_cred_hist_length"]),
)

In [8]:
train_processed_df = pipeline.fit_transform(train_df)
train_processed_df

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length,loan_status
0,0.165049,0.016246,0,0.000000,0,0,0.159420,0.341011,0.204819,0,0.428571,0
1,0.019417,0.027324,1,0.048780,1,1,0.101449,0.445506,0.084337,0,0.000000,0
2,0.087379,0.012976,1,0.065041,2,2,0.159420,0.195506,0.253012,0,0.285714,0
3,0.097087,0.034708,0,0.113821,3,0,0.333333,0.319663,0.204819,0,0.107143,0
4,0.019417,0.029433,0,0.016260,1,2,0.159420,0.084270,0.120482,0,0.035714,0
...,...,...,...,...,...,...,...,...,...,...,...,...
58640,0.135922,0.061082,2,0.040650,0,3,0.710145,0.591573,0.253012,1,0.285714,0
58641,0.077670,0.012976,0,0.000000,1,1,0.275362,0.410674,0.421687,0,0.214286,1
58642,0.029126,0.020994,0,0.056911,0,3,0.182609,0.594382,0.180723,0,0.000000,1
58643,0.019417,0.013609,0,0.016260,0,2,0.130435,0.195506,0.204819,0,0.035714,0


In [9]:
train_X = torch.tensor(train_processed_df.drop(columns=["loan_status"]).values, dtype=torch.float32)
train_y = torch.tensor(train_processed_df["loan_status"].values, dtype=torch.int64)
full_dataset = torch.utils.data.TensorDataset(train_X, train_y)
n_train = int(len(full_dataset) * 0.8)
n_val = int(len(full_dataset) * 0.1)
n_test = len(full_dataset) - n_train - n_val
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [n_train, n_val, n_test])

In [10]:
class MyModel(lightning.LightningModule):
    def __init__(self):
        super().__init__()
        self._stack = torch.nn.Sequential(
            torch.nn.Linear(11, 32), torch.nn.ReLU(),
            torch.nn.Linear(32, 2), torch.nn.Softmax(dim=1),
        )
        self._accuracy = torchmetrics.classification.BinaryAccuracy()

    def forward(self, x):
        return self._stack(x)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters())

    def training_step(self, batch, batch_idx):
        (loss, acc) = self._compute(batch, batch_idx)
        self.log("train_loss", loss)
        self.log("train_acc", acc)
        return loss

    def validation_step(self, batch, batch_idx):
        (loss, acc) = self._compute(batch, batch_idx)
        self.log("val_loss", loss)
        self.log("val_acc", acc)
        return loss

    def test_step(self, batch, batch_idx):
        (loss, acc) = self._compute(batch, batch_idx)
        self.log("test_loss", loss)
        self.log("test_acc", acc)
        return loss
    
    def _compute(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        pred = y_hat.argmax(axis=1)
        loss = torch.nn.functional.cross_entropy(y_hat, y)
        acc = self._accuracy(pred, y)
        return (loss, acc)

In [11]:
class MyDataModule(lightning.LightningDataModule):
    def train_dataloader(self):
        return torch.utils.data.DataLoader(train_dataset, batch_size=512)

    def val_dataloader(self):
        return torch.utils.data.DataLoader(val_dataset, batch_size=512)

    def test_dataloader(self):
        return torch.utils.data.DataLoader(test_dataset)

In [12]:
trainer = lightning.Trainer(max_epochs=500, callbacks=[lightning.pytorch.callbacks.early_stopping.EarlyStopping(monitor="val_loss", mode="min")])
model = MyModel()
loader = MyDataModule()
trainer.fit(model, datamodule=loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name      | Type           | Params | Mode 
-----------------------------------------------------
0 | _stack    | Sequential     | 450    | train
1 | _accuracy | BinaryAccuracy | 0      | train
-----------------------------------------------------
450       Trainable params
0         Non-trainable params
450       Total params
0.002     Total estimated model params size (MB)
6         Modules in train mode
0         Modules in eval mode


Sanity Checking: |                                                                               | 0/? [00:00<…

C:\Users\hsmtk\repo\fantastic-octo-dollop\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
C:\Users\hsmtk\repo\fantastic-octo-dollop\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Training: |                                                                                      | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

Validation: |                                                                                    | 0/? [00:00<…

In [13]:
trainer.test(datamodule=loader)

Restoring states from the checkpoint path at C:\Users\hsmtk\repo\fantastic-octo-dollop\loan-approval\lightning_logs\version_0\checkpoints\epoch=90-step=8372.ckpt
Loaded model weights from the checkpoint at C:\Users\hsmtk\repo\fantastic-octo-dollop\loan-approval\lightning_logs\version_0\checkpoints\epoch=90-step=8372.ckpt
C:\Users\hsmtk\repo\fantastic-octo-dollop\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing: |                                                                                       | 0/? [00:00<…

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc             0.853878915309906
        test_loss           0.4593595862388611
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.4593595862388611, 'test_acc': 0.853878915309906}]

In [14]:
!taskkill /im tensorboard.exe /f
!del /q %TMP%\.tensorboard-info\*
%reload_ext tensorboard
%tensorboard --logdir=lightning_logs/

ERROR: The process "tensorboard.exe" not found.


In [15]:
test_df = pandas.read_csv("test.csv")

In [22]:
tmp_df = test_df.copy()
tmp_df["loan_status"] = 0
test_processed_df = pipeline.transform(tmp_df)
test_processed_df = test_processed_df.drop(columns=["loan_status"])
test_processed_df

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
0,0.029126,0.034181,0,0.024390,5,5,0.710145,0.580899,0.433735,0,0.000000
1,0.058252,0.048423,2,0.048780,2,1,0.275362,0.407865,0.120482,1,0.071429
2,0.058252,0.013609,0,0.040650,3,4,0.101449,0.661236,0.156627,1,0.000000
3,0.126214,0.024159,0,0.032520,4,2,0.188406,0.195506,0.168675,0,0.178571
4,0.058252,0.051588,2,0.065041,5,3,0.420290,0.612360,0.180723,1,0.071429
...,...,...,...,...,...,...,...,...,...,...,...
39093,0.019417,0.014242,2,0.016260,4,0,0.072464,0.278090,0.120482,0,0.071429
39094,0.019417,0.023104,2,0.048780,0,2,0.188406,0.034270,0.180723,0,0.035714
39095,0.300971,0.029433,2,0.000000,2,2,0.420290,0.117416,0.301205,0,0.821429
39096,0.019417,0.016774,2,0.032520,2,3,0.391304,0.573034,0.469880,1,0.071429


In [23]:
test_tensor = torch.tensor(test_processed_df.values, dtype=torch.float32)

In [24]:
with torch.no_grad():
    predict_tensor = model(test_tensor)

In [25]:
predict_tensor

tensor([[1.0000e+00, 2.4823e-20],
        [1.0000e+00, 9.9134e-13],
        [1.0000e+00, 6.7111e-16],
        ...,
        [1.0000e+00, 1.6283e-14],
        [1.0000e+00, 2.0089e-16],
        [1.0000e+00, 9.7011e-13]])

In [26]:
predict_tensor.argmax(axis=1)

tensor([0, 0, 0,  ..., 0, 0, 0])