In [12]:
import pickle
from change_detector_model import ChangeDetector
from torch.utils.data import Dataset, DataLoader
import torch
from sklearn.model_selection import train_test_split
import numpy as np
import lightning as L
from sklearn.metrics import accuracy_score

In [2]:
with open('../prepare-transaction-df/prepared_transactions.df.list.pkl', 'rb') as f:
    prepared_dfs = pickle.load(f)

In [3]:
data = []
labels = []
for df in prepared_dfs:
    change = df[['is_change']]
    label = np.array(change.index[change['is_change'] == True].tolist())
    if len(label) == 0 or label[0] > 1:
        continue
    labels.append(label)
    data.append(df.drop('is_change', axis=1,inplace=False))

In [4]:
class TransactionDataset(Dataset):
    def __init__(self, data, label, max_size = 1044):
        super().__init__()
        self.data = data
        self.label = label
        self.max_size = max_size
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        sample = self.data[idx]
        sample = sample.values.flatten()
        pad = self.max_size - len(sample)
        sample = np.pad(sample, (0, pad), 'constant')
        label = self.label[idx]
        return torch.tensor(sample, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)

In [5]:
train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.1)

In [6]:
train_dataset = TransactionDataset(train_data, train_labels)
test_dataset = TransactionDataset(test_data, test_labels)

In [7]:
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False)

In [8]:
# for i, s in train_dataloader:
#     print(i)
#     print(s)
#     break

In [9]:
model = ChangeDetector(1044)

In [10]:
trainer = L.Trainer(max_epochs=10)
trainer.fit(model, train_dataloader, test_dataloader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
c:\Users\futaba\AppData\Local\Programs\Python\Python311\Lib\site-packages\lightning\pytorch\trainer\connectors\logger_connector\logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default

  | Name     | Type              | Params
-----------------------------------------------
0 | fc1      | Linear            | 133 K 
1 | fc2      | Linear            | 8.3 K 
2 | fc3      | Linear            | 2.1 K 
3 | fc4      | Linear            | 528   
4 | fc5      | Linear            | 3

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\futaba\AppData\Local\Programs\Python\Python311\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
c:\Users\futaba\AppData\Local\Programs\Python\Python311\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


In [11]:
trainer.test(model, dataloaders=test_dataloader)

c:\Users\futaba\AppData\Local\Programs\Python\Python311\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

[{}]

In [13]:
model.eval()  # Set the model to evaluation mode

# Move the model to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Initialize a list to store predictions and true labels
all_preds = []
all_labels = []

# Iterate over the test dataset
with torch.no_grad():  # Disable gradient calculation
    for input_i, label_i in test_dataloader:
        input_i = input_i.to(device)
        label_i = label_i.to(device)

        # Forward pass
        output_i = model(input_i)
        preds = torch.sigmoid(output_i) > 0.5

        # Store predictions and true labels
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(label_i.cpu().numpy())
        
accuracy = accuracy_score(all_labels, all_preds)
print(f"Test Accuracy: {accuracy}")

Test Accuracy: 0.8577235772357723


In [14]:
with open('change_detector_model.pkl', 'wb') as f:
    pickle.dump(model, f, pickle.HIGHEST_PROTOCOL)