In [1]:
import torch
import pandas as pd
import os

In [4]:
class TitlePartyModel(torch.nn.Module):
    def __init__(self):
        super(TitlePartyModel, self).__init__()
        self.input = torch.nn.Linear(2048,2048, dtype=torch.float32)
        self.input_activation = torch.nn.Sigmoid()
        self.hidden1 = torch.nn.Linear(2048,1024)
        self.hidden1_activation = torch.nn.Sigmoid()
        self.hidden2 = torch.nn.Linear(1024,128)
        self.hidden2_activation = torch.nn.Sigmoid()
        # 4 political party choices
        self.hidden3 = torch.nn.Linear(128,4)
        self.output = torch.nn.Softmax()

    def forward(self, x):
        x = self.input(x)
        x = self.input_activation(x)
        x = self.hidden1(x)
        x = self.hidden1_activation(x)
        x = self.hidden2(x)
        x = self.hidden2_activation(x)
        x = self.hidden3(x)
        x = self.output(x)
        return x



In [5]:
title_model = TitlePartyModel()
print("the model")
print(title_model)
print("just the 2nd layer")
print(title_model.hidden1)
print("parameters")
for p in title_model.parameters():
    print(p)
print("2nd layer params")
for p in title_model.hidden1.parameters():
    print(p)

the model
TitlePartyModel(
  (input): Linear(in_features=2048, out_features=2048, bias=True)
  (input_activation): Sigmoid()
  (hidden1): Linear(in_features=2048, out_features=1024, bias=True)
  (hidden1_activation): Sigmoid()
  (hidden2): Linear(in_features=1024, out_features=128, bias=True)
  (hidden2_activation): Sigmoid()
  (hidden3): Linear(in_features=128, out_features=4, bias=True)
  (output): Softmax(dim=None)
)
just the 2nd layer
Linear(in_features=2048, out_features=1024, bias=True)
parameters
Parameter containing:
tensor([[-0.0114,  0.0177,  0.0192,  ..., -0.0149,  0.0153,  0.0162],
        [-0.0155,  0.0038, -0.0156,  ..., -0.0212,  0.0207,  0.0159],
        [-0.0117, -0.0050,  0.0167,  ..., -0.0043,  0.0148,  0.0186],
        ...,
        [-0.0055, -0.0018,  0.0125,  ...,  0.0064,  0.0084, -0.0144],
        [ 0.0208,  0.0058,  0.0021,  ..., -0.0150,  0.0107,  0.0129],
        [-0.0113, -0.0089, -0.0142,  ...,  0.0176,  0.0201,  0.0203]],
       requires_grad=True)
Paramete

In [6]:
import torch
import torchvision
import torchvision.transforms as transforms
import fnmatch
import numpy as np

In [28]:
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.RMSprop(title_model.parameters(), lr=0.01, momentum=0.9)

In [29]:
train_files = []
test_files = []
split = 0.7
token_path = os.path.join(os.getcwd(),"..","data","tokenized")
print(token_path)
for root, dirs, files in os.walk(token_path):
    for f in files:
        if fnmatch.fnmatch(f, "*shrunk*"):
            if np.random.sample(1) <= split:
                print(f'train: {f}')
                train_files.append(os.path.join(root, f))
            else:
                print(f'test : {f}')
                test_files.append(os.path.join(root, f))
print(train_files)
print(test_files)

C:\Users\benja\git-projects\bitbucket\nlp_legislation_prediction\training\..\data\tokenized
train: summary_bill_1811_1393180-shrunk.pkl
train: summary_bill_1811_1393181-shrunk.pkl
train: summary_bill_1811_1470063-shrunk.pkl
test : summary_bill_1811_1506887-shrunk.pkl
train: summary_bill_1959_1542899-shrunk.pkl
train: summary_bill_1959_1545862-shrunk.pkl
train: summary_bill_1959_1546074-shrunk.pkl
train: summary_bill_1959_1546096-shrunk.pkl
['C:\\Users\\benja\\git-projects\\bitbucket\\nlp_legislation_prediction\\training\\..\\data\\tokenized\\summary_bill_1811_1393180-shrunk.pkl', 'C:\\Users\\benja\\git-projects\\bitbucket\\nlp_legislation_prediction\\training\\..\\data\\tokenized\\summary_bill_1811_1393181-shrunk.pkl', 'C:\\Users\\benja\\git-projects\\bitbucket\\nlp_legislation_prediction\\training\\..\\data\\tokenized\\summary_bill_1811_1470063-shrunk.pkl', 'C:\\Users\\benja\\git-projects\\bitbucket\\nlp_legislation_prediction\\training\\..\\data\\tokenized\\summary_bill_1959_1542899-

In [30]:
class SummaryDataSet(torch.utils.data.Dataset):
    def __init__(self, file_path_arr):
        self.data_frames = []
        for f in file_path_arr:
            print(f"loading {f}")
            self.data_frames.append(pd.read_pickle(f, compression="gzip"))

    def __len__(self):
        return 1
#        return len(self.data_frames)

    def __getitem__(self, idx):
        next_df = self.data_frames[0]
        party = next_df["party"][0] # they are all the same, so just pick the first one
        encoding = torch.tensor(np.array(next_df["input_shrunk"]),dtype=torch.float)
        # 4 politcal party choices
        party_arr = np.zeros(4,dtype=int)
        # the party index was stored as value with a starting index of 1 -- rethink this
        party_arr[party-1] = 1 # set the value to 1 for the party index
        return encoding, torch.tensor(party_arr,dtype=torch.float)

In [31]:
from torch.utils.data import DataLoader
train_data_set = SummaryDataSet(train_files)
test_data_set = SummaryDataSet(test_files)
train_loader = DataLoader(train_data_set,batch_size=1,shuffle=True)
test_loader = DataLoader(test_data_set, batch_size=1, shuffle=True)

loading C:\Users\benja\git-projects\bitbucket\nlp_legislation_prediction\training\..\data\tokenized\summary_bill_1811_1393180-shrunk.pkl
loading C:\Users\benja\git-projects\bitbucket\nlp_legislation_prediction\training\..\data\tokenized\summary_bill_1811_1393181-shrunk.pkl
loading C:\Users\benja\git-projects\bitbucket\nlp_legislation_prediction\training\..\data\tokenized\summary_bill_1811_1470063-shrunk.pkl
loading C:\Users\benja\git-projects\bitbucket\nlp_legislation_prediction\training\..\data\tokenized\summary_bill_1959_1542899-shrunk.pkl
loading C:\Users\benja\git-projects\bitbucket\nlp_legislation_prediction\training\..\data\tokenized\summary_bill_1959_1545862-shrunk.pkl
loading C:\Users\benja\git-projects\bitbucket\nlp_legislation_prediction\training\..\data\tokenized\summary_bill_1959_1546074-shrunk.pkl
loading C:\Users\benja\git-projects\bitbucket\nlp_legislation_prediction\training\..\data\tokenized\summary_bill_1959_1546096-shrunk.pkl
loading C:\Users\benja\git-projects\bitbu

In [47]:
def train_one_epoch(epoch_idx, model, summary_writer):
    running_loss = 0
    last_loss = 0.
    for i, data in enumerate(train_loader):
        inputs, label = data
        input_tensor = inputs.view(1,-1)
        outputs = model(input_tensor)
        loss = loss_fn(outputs, label)
        print(type(loss))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        print(type(running_loss))
        last_loss = running_loss
        summary_idx = i * len(train_loader) + i + 1
        summary_writer.add_scalar("loss/train", last_loss, summary_idx)
    return last_loss

In [48]:
title_model = TitlePartyModel()

In [49]:
import datetime
from torch.utils.tensorboard import SummaryWriter
start_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
writer = SummaryWriter('runs/fashion_trainer_{}'.format(start_time))
EPOCHS = 10
epoch_num = 0
losses = []
for epoch in range(EPOCHS):
    print("turn on training")
    title_model.train(True)
    print("running one epoch")
    last_epoch_loss = train_one_epoch(epoch_num,title_model, writer)
    print("turn off training")
    print(f'epoch loss {last_epoch_loss}')

    # title_model.train(False)

    running_validation_loss = 0.0
    with (torch.no_grad()):
        for i, vdata in enumerate(test_loader):
            vinputs, vlabel = vdata
            print(f"label : {vlabel}")
            voutputs = title_model(vinputs)
            print(f"vOutput: {voutputs}")
            vloss = loss_fn(voutputs, vlabel)
            running_validation_loss += vloss

    avg_vloss = running_validation_loss / len(test_loader)
    losses.append(avg_vloss)
    print('LOSS train {} valid {}'.format(last_epoch_loss, avg_vloss))
#    writer.add_scalars("Training vs Valiation loss",{"training": last_epoch_loss, "validation": avg_vloss}, epoch_num+1)
#    writer.flush()
    epoch_num += 1

turn on training
running one epoch
<class 'torch.Tensor'>
<class 'float'>
turn off training
epoch loss 0.1541554182767868
label : tensor([[0., 0., 0., 1.]])
vOutput: tensor([[0.3229, 0.2860, 0.2060, 0.1851]])
LOSS train 0.1541554182767868 valid 0.22315868735313416
turn on training
running one epoch
<class 'torch.Tensor'>
<class 'float'>
turn off training
epoch loss 0.1541554182767868
label : tensor([[0., 0., 0., 1.]])
vOutput: tensor([[0.3229, 0.2860, 0.2060, 0.1851]])
LOSS train 0.1541554182767868 valid 0.22315868735313416
turn on training
running one epoch
<class 'torch.Tensor'>
<class 'float'>
turn off training
epoch loss 0.1541554182767868
label : tensor([[0., 0., 0., 1.]])
vOutput: tensor([[0.3229, 0.2860, 0.2060, 0.1851]])
LOSS train 0.1541554182767868 valid 0.22315868735313416
turn on training
running one epoch
<class 'torch.Tensor'>
<class 'float'>
turn off training
epoch loss 0.1541554182767868
label : tensor([[0., 0., 0., 1.]])
vOutput: tensor([[0.3229, 0.2860, 0.2060, 0.1851

  x = self.output(x)


<class 'float'>
turn off training
epoch loss 0.1541554182767868
label : tensor([[0., 0., 0., 1.]])
vOutput: tensor([[0.3229, 0.2860, 0.2060, 0.1851]])
LOSS train 0.1541554182767868 valid 0.22315868735313416
turn on training
running one epoch
<class 'torch.Tensor'>
<class 'float'>
turn off training
epoch loss 0.1541554182767868
label : tensor([[0., 0., 0., 1.]])
vOutput: tensor([[0.3229, 0.2860, 0.2060, 0.1851]])
LOSS train 0.1541554182767868 valid 0.22315868735313416
turn on training
running one epoch
<class 'torch.Tensor'>
<class 'float'>
turn off training
epoch loss 0.1541554182767868
label : tensor([[0., 0., 0., 1.]])
vOutput: tensor([[0.3229, 0.2860, 0.2060, 0.1851]])
LOSS train 0.1541554182767868 valid 0.22315868735313416
turn on training
running one epoch
<class 'torch.Tensor'>
<class 'float'>
turn off training
epoch loss 0.1541554182767868
label : tensor([[0., 0., 0., 1.]])
vOutput: tensor([[0.3229, 0.2860, 0.2060, 0.1851]])
LOSS train 0.1541554182767868 valid 0.2231586873531341

In [27]:
print(losses)

[tensor(1.4816, grad_fn=<DivBackward0>), tensor(1.4816, grad_fn=<DivBackward0>), tensor(1.4816, grad_fn=<DivBackward0>), tensor(1.4816, grad_fn=<DivBackward0>), tensor(1.4816, grad_fn=<DivBackward0>), tensor(1.4816, grad_fn=<DivBackward0>), tensor(1.4816, grad_fn=<DivBackward0>), tensor(1.4816, grad_fn=<DivBackward0>), tensor(1.4816, grad_fn=<DivBackward0>), tensor(1.4816, grad_fn=<DivBackward0>)]
