In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from typing import List

In [None]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

In [None]:
class NeuralNetwork(nn.Module):
  def __init__(self):
      super().__init__()
      self.linear_stack = nn.Sequential(
          nn.Linear(15, 10),
          nn.LeakyReLU(),
          nn.Linear(10, 5),
          nn.LeakyReLU(),
          nn.Linear(5, 1),
          nn.LeakyReLU()
      )

  def forward(self, x):
    logits = self.linear_stack(x)
    return logits

In [None]:
model = NeuralNetwork().to(device)
print(model)

loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [None]:
def load_labels(filename):
    cols = ["flag"]
    data = pd.read_csv(filename, usecols=cols)
    # data.set_index("id", inplace=True)
    return data

In [None]:
def data_prep(filename):
  desired_columns = ["pre_loans5",
                     "pre_loans530",
                     "pre_loans3060",
                     "pre_loans6090",
                     "pre_loans90",
                     "pre_till_pclose",
                     "pre_till_fclose",
                     "pre_loans_credit_limit",
                     "pre_loans_next_pay_summ",
                     "pre_loans_outstanding",
                     "pre_loans_total_overdue",
                     "pre_loans_max_overdue_sum",
                     "pre_util",
                     "pre_over2limit",
                     "pre_maxover2limit"]
  data = pd.read_csv(filename,usecols = desired_columns)
  # data.set_index("id", inplace=True)
  return data

In [None]:
class CustomDataset(Dataset):
  def __init__(self, dataset_file, label_file, transform=None):
    self.df = data_prep(dataset_file)
    self.labels = load_labels(label_file)
    self.transform = transform


  def __len__(self):
    return self.df.shape[0]

  def __getitem__(self, index):
    data = torch.tensor(self.df.loc[index]).float()
    data = data.to(device)
    label = torch.tensor(self.labels.loc[index]).float()
    label = label.to(device)

    if self.transform:
      data = self.transform(data)
    return data, label

In [None]:
data_files = [
    "datathon_student/train_data/train_data_0.csv",
    "datathon_student/train_data/train_data_1.csv",
    "datathon_student/train_data/train_data_2.csv",
    "datathon_student/train_data/train_data_3.csv",
    "datathon_student/train_data/train_data_4.csv",
    "datathon_student/train_data/train_data_5.csv",
    "datathon_student/train_data/train_data_6.csv",
    "datathon_student/train_data/train_data_7.csv",
    "datathon_student/train_data/train_data_8.csv",
    "datathon_student/train_data/train_data_9.csv",
    "datathon_student/train_data/train_data_10.csv",
]



In [None]:
def train_one_epoch(epoch_index):
  running_loss = 0.
  last_loss = 0.

  for i, data in enumerate(training_loader):
    # Every data instance is an input + label pair
    inputs, labels = data

    # Zero gradients for every batch
    optimizer.zero_grad()

    # Make predictions for this batch
    outputs = model(inputs)

    # Compute the loss and its gradients
    loss = loss_fn(outputs, labels)
    loss.backward()

    # Adjust learning weights
    optimizer.step()

    # Gather data and report
    running_loss += loss.item()
    if i % 1000 == 999:
      last_loss = running_loss / 1000 # loss per batch
      print('  batch {} loss: {}'.format(i + 1, last_loss))
      running_loss = 0.

  return last_loss

In [None]:
for filename in data_files:
    training_dataset = CustomDataset(filename, "datathon_student/train_target.csv", )
    training_loader = DataLoader(training_dataset, batch_size=64, shuffle=True)

    print(f"Loading file: {filename}")

    epoch_number = 0
    EPOCHS = 5

    average_losses = []

    model.train(True)

    for epoch in range(EPOCHS):
        print('EPOCH {}:'.format(epoch_number + 1))

        avg_loss = train_one_epoch(epoch_number)
        average_losses.append(avg_loss)

        print('LOSS train {}'.format(avg_loss))

    model_path = 'model_{}'.format(epoch_number)
    torch.save(model.state_dict(), model_path)