# Training of 1-layer NN with basic features
In this notebook, a vanilla 1-layer neural network is trained on all the basic-engineered features.
The models are stored in our Google Drive as follows:

└── Daryna-Diffusion-Lecture/
└── data/
└── subset/
└── models/
&nbsp;&nbsp;&nbsp;&nbsp;├── one_layer_nn
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;└── concat/
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;└── concat_cos/
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;└── hadamard/
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;└── hadamard_cos
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;└── mean/
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;└── mean_cos/
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;└── sum/
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;└── sum_cos/


## 0. Imports and Constants
- Change the path in "cw_dir" to your local Google Drive sync location
- Specify the dataset version in the CONSTANTS part

In [57]:
import numpy as np
import torch
from custom_dataset import CustomDataset, DataLoader
from network import NeuralNetwork
import matplotlib.pyplot as plt

# CONSTANTS
dataset_version = "v2"
cw_dir = "/home/jockl/Insync/check.worthiness@gmail.com/Google Drive/"
data_dir = f"{cw_dir}/data/CT23_1A_checkworthy_multimodal_english_{dataset_version}"
features_dir = f"{data_dir}/features"
models_dir = f"{cw_dir}/models"
train_labels_path = f"{data_dir}/labels/train_labels_{dataset_version}.pickle"

# CONSTANT DICT KEYS
TRAIN = "train"
DEV = "dev"
TEST = "test"
splits = [TRAIN, DEV, TEST]

## 1. Load Training Labels
The training labels are loaded from the pickle file.

In [58]:
# Load train labels from pickle file as np.array
train_labels = np.load(f"{train_labels_path}", allow_pickle=True)
print(train_labels.shape, train_labels)

(2356,) [1 1 0 ... 0 0 0]


## 2. Load Engineered Features
All engineered feature matrices are loaded from their respective pickle files.


In [59]:
# Initialize dict that maps a feature engineering method to its feature matrix
method_to_feat_matrix = {"concat": None, "concat_cos": None,
                         "sum": None, "sum_cos": None,
                         "mean": None, "mean_cos": None,
                         "hadamard": None, "hadamard_cos": None}

# Load feature matrix for every method
for method in method_to_feat_matrix.keys():
    method_to_feat_matrix[method] = np.load(f"{features_dir}/{method}/{method}_train_{dataset_version}.pickle", allow_pickle=True)

# Spot check
print(method_to_feat_matrix["concat"].shape)

(2356, 1536)


# 3. Set up the Dataloader

In [60]:
# Dataloader for one exemplary feature matrix "concat"
batch_size = 1
features = method_to_feat_matrix["concat"]
train_data = CustomDataset(features, train_labels)
train_dataloader = DataLoader(dataset=train_data, batch_size=1, shuffle=True)

# 4. Set up the Network

In [61]:
# Set up the Network
input_dim = len(train_data.__getitem__(0)[0])
hidden_dim = 1
output_dim = 1
network = NeuralNetwork(input_dim, hidden_dim, output_dim)

# Training params
lr = 0.1
loss_fn = torch.nn.BCELoss()
optimizer = torch.optim.SGD(network.parameters(), lr=lr)

# Train
num_epochs = 1
losses = []

# 5. Training Loop

In [62]:
for epoch in range(num_epochs):
    for features, labels in train_dataloader:
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        out = network(features)

        # Compute loss
        loss = loss_fn(out, labels.unsqueeze(-1))
        losses.append(loss.item())

        # Backward pass
        loss.backward()

        # Optimize
        optimizer.step()