In [1]:
!pip install imbalanced-learn
!pip install torchinfo



In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import tensorflow.keras
from tensorflow.keras.datasets import reuters
import pandas as pd

from torchinfo import summary

import matplotlib.pyplot as plt
from IPython import display
import torch
import numpy as np
from imblearn.over_sampling import RandomOverSampler,SMOTE,SMOTENC


import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from pprint import pprint



In [3]:
def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results

In [4]:
# fix random number

random_seed = 7777
# seed(random_seed)

torch.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True

<torch._C.Generator at 0x7f95115b3510>

In [5]:
class news_dataset(Dataset):
    def __init__(self,x,y):
        self.x = torch.from_numpy(x).float()
        self.y = y

    def __getitem__(self,index):
        return self.x[index], self.y[index]
    def __len__(self):
        return self.x.size()[0]

In [6]:
def train_model(model, train_dataset, test_dataset, device, optimizer,
                epochs=50,
                lr=0.0005, 
                batch_size=256, 
                regularizer=0,
                criterion=nn.CrossEntropyLoss()                
                ):  # add more parameters if needed
    train_loader = DataLoader(train_dataset, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    
    model = model.to(device)

    train_history = {'train_loss': [],
                    'train_acc': [],
                    'test_loss': [],
                    'test_acc': []}

    
    

    for epoch in range(epochs):
        model.train()
        train_loss = 0
        train_acc = 0
        test_loss = 0
        test_acc = 0
        for x,y in train_loader:
            x = x.to(device)
            y = y.to(device)


            outputs = model(x)
            score, pred = torch.max(outputs, 1) 
            

            cur_train_loss = criterion(outputs, y)
         
            cur_train_acc = (pred == y).sum().float() / batch_size

            cur_train_loss.backward()
            optimizer.step()
            optimizer.zero_grad() 

            train_loss += cur_train_loss
            train_acc += cur_train_acc

        model.eval()
        with torch.no_grad():
            for x, y in test_loader:
                # move
                x = x.to(device)
                y = y.to(device)
                # predict
                outputs = model(x)
                score, pred = torch.max(outputs, 1)
                cur_test_loss = criterion(outputs, y)
                cur_test_acc = (pred == y).sum().float() / batch_size 
                # loss and acc
                test_loss += cur_test_loss
                test_acc += cur_test_acc

        train_loss = (train_loss/len(train_loader)).item()
        train_acc = train_acc/len(train_loader)
        val_loss = (test_loss/len(test_loader)).item()
        val_acc = test_acc/len(test_loader)

        train_history['train_loss'].append(train_loss)
        train_history['train_acc'].append(train_acc)
        train_history['test_loss'].append(val_loss)
        train_history['test_acc'].append(val_acc)
        print(f"Epoch:{epoch + 1} / {epochs}, train loss:{train_loss:.5f} train_acc:{train_acc:.5f}, valid loss:{val_loss:.5f} valid acc:{val_acc:.5f}")
        # display.clear_output(wait=True)
 
    return train_history

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10000)
ros = RandomOverSampler(random_state=0)



# Our vectorized training data, puts them all on to a same dimension
x_train = vectorize_sequences(train_data)
# Our vectorized test data
x_test = vectorize_sequences(test_data)

X_resampled, y_resampled = ros.fit_resample(x_train, train_labels)



In [8]:
y_test = np.array(train_labels)
train_dataset = news_dataset(X_resampled, y_resampled)
test_dataset = news_dataset(x_test, y_test)

In [9]:
class linear_model_with_regularizer2(nn.Module):
    def __init__(self, hidden_l1,dropout_ratio=0):  # add more parameters if needsed
        
       # add your code
        super(linear_model_with_regularizer2, self).__init__()
        self.classifier = nn.Sequential(
            nn.Dropout(p=dropout_ratio),
            nn.Linear(in_features=10000, out_features=hidden_l1),
            nn.Softmax(),
            nn.Dropout(p=dropout_ratio),
            nn.Linear(in_features=hidden_l1, out_features=46)
        )
    
    def forward(self, x):
        output = self.classifier(x)
        return output

In [10]:
lr=0.005
regularizer=0
modelr = linear_model_with_regularizer2(hidden_l1=128,dropout_ratio=0.5)
optimizer = torch.optim.Adam(modelr.parameters(), lr=lr, weight_decay=regularizer)
hista = train_model(modelr, train_dataset, test_dataset, device, epochs=50, optimizer=optimizer)

  input = module(input)


Epoch:1 / 50, train loss:4.35318 train_acc:0.01959, valid loss:3.65848 valid acc:0.00304
Epoch:2 / 50, train loss:3.79885 train_acc:0.01650, valid loss:3.97640 valid acc:0.00738
Epoch:3 / 50, train loss:3.69075 train_acc:0.02514, valid loss:3.79351 valid acc:0.09288
Epoch:4 / 50, train loss:3.56104 train_acc:0.02719, valid loss:3.81738 valid acc:0.10156
Epoch:5 / 50, train loss:3.46429 train_acc:0.03150, valid loss:3.86212 valid acc:0.09549
Epoch:6 / 50, train loss:3.38983 train_acc:0.03679, valid loss:3.90443 valid acc:0.00651
Epoch:7 / 50, train loss:3.32318 train_acc:0.03684, valid loss:3.91408 valid acc:0.01649
Epoch:8 / 50, train loss:3.26117 train_acc:0.03513, valid loss:3.91209 valid acc:0.03516
Epoch:9 / 50, train loss:3.22346 train_acc:0.03439, valid loss:3.94758 valid acc:0.03255
Epoch:10 / 50, train loss:3.18112 train_acc:0.03438, valid loss:3.95684 valid acc:0.03038
Epoch:11 / 50, train loss:3.14859 train_acc:0.03650, valid loss:3.92460 valid acc:0.05903
Epoch:12 / 50, trai