# Loading data 

In [None]:
import pandas as pd
import numpy as np

In [None]:
def load_data(split_name='train', columns=['text', 'label'], folder='data'):
    '''
        "split_name" may be set as 'train', 'valid' or 'test' to load the corresponding dataset.
        
        You may also specify the column names to load any columns in the .csv data file.
        Among many, "text" can be used as model input, and "label" column is the labels (sentiment). 
    '''
    try:
        print(f"select [{', '.join(columns)}] columns from the {split_name} split")
        df = pd.read_csv(f'{folder}/{split_name}.csv')
        df = df.loc[:,columns]
        print("Success")
        return df
    except:
        print(f"Failed loading specified columns... Returning all columns from the {split_name} split")
        df = pd.read_csv(f'{folder}/{split_name}.csv')
        return df

In [None]:
train_df = load_data('train', columns=['text', 'label'], folder='data')
valid_df = load_data('valid', columns=['text', 'label'], folder='data')
# the test set labels (the 'label' column) are unavailable! So the following code will instead return all columns
test_df = load_data('test_no_label', columns=['id', 'text'], folder='data')

In [None]:
print(train_df.size)
print(valid_df.size)

# 2. Text data processing

In [None]:
import nltk
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
stopwords = set (stopwords.words('english'))
porterStemmer = PorterStemmer()

def lower(s):
    """
    :param s: a string.
    return a string with lower characters
    Note that we allow the input to be nested string of a list.
    e.g.
    Input: 'Text mining is to identify useful information.'
    Output: 'text mining is to identify useful information.'
    """
    if isinstance(s, list):
        return [lower(t) for t in s]
    if isinstance(s, str):
        return s.lower()
    else:
        raise NotImplementedError("unknown datatype")


def tokenize(text):
    """
    :param text: a doc with multiple sentences, type: str
    return a word list, type: list
    e.g.
    Input: 'Text mining is to identify useful information.'
    Output: ['Text', 'mining', 'is', 'to', 'identify', 'useful', 'information', '.']
    """
    return nltk.word_tokenize(text)


def stem(tokens):
    """
    :param tokens: a list of tokens, type: list
    return a list of stemmed words, type: list
    e.g.
    Input: ['Text', 'mining', 'is', 'to', 'identify', 'useful', 'information', '.']
    Output: ['text', 'mine', 'is', 'to', 'identifi', 'use', 'inform', '.']
    """
    ### equivalent code
    # results = list()
    # for token in tokens:
    #     results.append(ps.stem(token))
    # return results

    return [porterStemmer.stem(token) for token in tokens]

def n_gram(tokens, n=1):
    """
    :param tokens: a list of tokens, type: list
    :param n: the corresponding n-gram, type: int
    return a list of n-gram tokens, type: list
    e.g.
    Input: ['text', 'mine', 'is', 'to', 'identifi', 'use', 'inform', '.'], 2
    Output: ['text mine', 'mine is', 'is to', 'to identifi', 'identifi use', 'use inform', 'inform .']
    """
    if n == 1:
        return tokens
    else:
        results = list()
        for i in range(len(tokens)-n+1):
            # tokens[i:i+n] will return a sublist from i th to i+n th (i+n th is not included)
            results.append(" ".join(tokens[i:i+n]))
        return results

def filter_stopwords(tokens):
    """
    :param tokens: a list of tokens, type: list
    return a list of filtered tokens, type: list
    e.g.
    Input: ['text', 'mine', 'is', 'to', 'identifi', 'use', 'inform', '.']
    Output: ['text', 'mine', 'identifi', 'use', 'inform', '.']
    """
    ### equivalent code
    # results = list()
    # for token in tokens:
    #     if token not in stopwords and not token.isnumeric():
    #         results.append(token)
    # return results

    return [token for token in tokens if token not in stopwords and not token.isnumeric()]


def get_onehot_vector(feats, feats_dict):
    """
    :param data: a list of features, type: list
    :param feats_dict: a dict from features to indices, type: dict
    return a feature vector,
    """
    # initialize the vector as all zeros
    vector = np.zeros(len(feats_dict), dtype=float)
    for f in feats:
        # get the feature index, return -1 if the feature is not existed
        f_idx = feats_dict.get(f, -1)
        if f_idx != -1:
            # set the corresponding element as 1
            vector[f_idx] = 1
    return vector

def biGram(tokens):
    return n_gram(tokens, 2)

In [None]:
import spacy
nlp = spacy.load('en_core_web_sm')

# 3. Build our model

## Initial trial

### Import library

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
import tqdm
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

### Data preprocessing

In [None]:
train_data_x = train_df['text'].map(tokenize).map(filter_stopwords).map(stem).map(biGram)
train_data_y = train_df['label']
valid_data_x = valid_df['text'].map(tokenize).map(filter_stopwords).map(stem).map(biGram)
valid_data_y = valid_df['label']

In [None]:
word2id = {}
for tokens in train_data_x:
    for t in tokens:
        if not t in word2id:
            word2id[t] = len(word2id)
word2id['<pad>'] = len(word2id)

In [None]:
def texts_to_id_seq(texts, padding_length=50):
    records = []
    for tokens in texts:
        record = []
        for t in tokens:
            record.append(word2id.get(t, len(word2id)))
        if len(record) >= padding_length:
            records.append(record[:padding_length])
        else:
            records.append(record + [word2id['<pad>']] * (padding_length - len(record)))
    return records

In [None]:
train_seqs = texts_to_id_seq(train_data_x)
valid_seqs = texts_to_id_seq(valid_data_x)

In [None]:
class MyDataset(Dataset):
    
    def __init__(self, seq, y):
        assert len(seq) == len(y)
        self.seq = seq
        self.y = y-1
    
    def __getitem__(self, idx):
        return np.asarray(self.seq[idx]), self.y[idx]

    def __len__(self):
        return len(self.seq)

In [None]:
batch_size = 16

train_loader = DataLoader(MyDataset(train_seqs, train_data_y), batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(MyDataset(valid_seqs, valid_data_y), batch_size=batch_size)

### Building model

In [None]:
class mlp(nn.Module):
    def __init__(self):
        super(mlp, self).__init__()
        self.embedding = nn.Embedding(num_embeddings=len(word2id)+1, embedding_dim=64)
        self.cnn = nn.Sequential(
            nn.Conv1d(in_channels=64,
                      out_channels=32,
                      kernel_size=5,
                      stride=1),
            nn.MaxPool1d(kernel_size=5, stride=1),
            nn.Dropout(0.5),
            nn.ReLU(),
            nn.Conv1d(in_channels=32,
                      out_channels=16,
                      kernel_size=3,
                      stride=1),
            nn.MaxPool1d(kernel_size=3, stride=1),
            nn.Dropout(0.5)
        )
        self.linear = nn.Linear(16, 5)
    
    def forward(self, x):
        x = self.embedding(x)
        x = torch.transpose(x, 1, 2)
        x = self.cnn(x)
        x = torch.max(x, dim=-1)[0]
        x = self.linear(x)
        return x

In [None]:
model = mlp()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = torch.nn.CrossEntropyLoss()

In [None]:
for e in range(1, 11):    
    print('epoch', e)
    model.train()
    total_acc = 0
    total_loss = 0
    total_count = 0
    with tqdm.tqdm(train_loader) as t:
        for x, y in t:
            optimizer.zero_grad()
            logits = model(x)
            loss = criterion(logits, y)
            loss.backward()
            total_acc += (logits.argmax(1) == y).sum().item()
            total_count += y.size(0)
            total_loss += loss.item()
            optimizer.step()
            t.set_postfix({'loss': total_loss/total_count, 'acc': total_acc/total_count})

    model.eval()
    y_pred = []
    y_true = []
    with tqdm.tqdm(valid_loader) as t:
        for x, y in t:
            logits = model(x)
            total_acc += (logits.argmax(1) == y).sum().item()
            total_count += len(y)
            y_pred += logits.argmax(1).tolist()
            y_true += y.tolist()
    print(classification_report(y_true, y_pred))
    print("\n\n")
    print(confusion_matrix(y_true, y_pred))

## Second trial

### Import library

In [None]:
from sklearn import svm
from sklearn.feature_extraction.text import TfidfVectorizer

### Data preprocessing

In [None]:
train_data_x = train_df['text'].map(tokenize).map(filter_stopwords).map(stem)
train_data_y = train_df['label']
valid_data_x = valid_df['text'].map(tokenize).map(filter_stopwords).map(stem)
valid_data_y = valid_df['label']
train_seqs = texts_to_id_seq(train_data_x)
valid_seqs = texts_to_id_seq(valid_data_x)

In [None]:
tfidf_vect_9010 = TfidfVectorizer(max_features = 5000)
tfidf_vect_9010.fit(train_df)
train_X_tfidf_9010 = tfidf_vect_9010.transform(train_df['text'])
test_X_tfidf_9010 = tfidf_vect_9010.transform(valid_df['text'])

### Building model

In [None]:
clf = svm.SVC(kernel="linear")
clf.fit(train_X_tfidf_9010, train_data_y)

In [None]:
y_pred = clf.predict(test_X_tfidf_9010)
print(classification_report(valid_data_y, y_pred))
print("\n\n")
print(confusion_matrix(valid_data_y, y_pred))
print('accuracy', np.mean(valid_data_y == y_pred))

## Third trial

### Import libray

In [171]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.linear_model import LogisticRegression

### Data preprocessing

In [172]:
train_df = load_data('train')
valid_df = load_data('valid')
x_train = train_df['text']
y_train = train_df['label']
x_valid = valid_df['text']
y_valid = valid_df['label']

select [text, label] columns from the train split
Success
select [text, label] columns from the valid split
Success


In [173]:
x_train.size

18000

In [174]:
train_data_x = train_df['text'].map(tokenize).map(lower).map(filter_stopwords).map(stem)
train_data_y = train_df['label']
valid_data_x = valid_df['text'].map(tokenize).map(lower).map(filter_stopwords).map(stem)
valid_data_y = valid_df['label']

In [175]:
print(train_data_x.size)

18000


In [176]:
for i in range(len(train_data_x)):
    train_data_x[i] = ' '.join(train_data_x[i])
for i in range(len(valid_data_x)):
    valid_data_x[i] = ' '.join(valid_data_x[i])

In [177]:
train_data_x.to_csv("data/norm_train_data.csv")
valid_data_x.to_csv("data/norm_valid_data.csv")

In [178]:
norm_train_data = load_data("norm_train_data")['text']
norm_valid_data = load_data("norm_valid_data")['text']

select [text, label] columns from the norm_train_data split
Failed loading specified columns... Returning all columns from the norm_train_data split
select [text, label] columns from the norm_valid_data split
Failed loading specified columns... Returning all columns from the norm_valid_data split


In [179]:
norm_train_data.head()

0    two wolfgang petersen direct film togeth one p...
1    fan seri movi film must . continu wrath khan l...
2    love movi . blu-ray fine , came expir digit co...
3    n't know go end movi . seen movi n't know happ...
4    watch minut movi , due offens content . want s...
Name: text, dtype: object

### Building model 1

In [180]:
countVectorizer = CountVectorizer(min_df=0.0, max_df=0.5, binary=False, ngram_range=(1, 3))
cV_train = countVectorizer.fit_transform(norm_train_data)
cV_valid = countVectorizer.transform(norm_valid_data)

In [181]:
print(cV_train.shape)
print(cV_valid.shape)

(18000, 328864)
(2000, 328864)


In [182]:
lr = LogisticRegression()
lr.fit(cV_train, y_train)
lr

In [183]:
y_pred = lr.predict(cV_valid)
print(classification_report(y_valid, y_pred))
print("\n\n")
print(confusion_matrix(y_valid, y_pred))
print('accuracy', np.mean(y_valid == y_pred))

              precision    recall  f1-score   support

           1       0.55      0.52      0.54       295
           2       0.36      0.19      0.25       198
           3       0.47      0.55      0.51       508
           4       0.50      0.45      0.47       523
           5       0.61      0.71      0.65       476

    accuracy                           0.52      2000
   macro avg       0.50      0.48      0.48      2000
weighted avg       0.51      0.52      0.51      2000




[[153  30  71  19  22]
 [ 54  37  80  17  10]
 [ 39  27 281 118  43]
 [ 17   7 125 235 139]
 [ 13   3  40  84 336]]
accuracy 0.521


### Building model 2

In [None]:
countVectorizer = CountVectorizer(min_df=0.0, max_df=0.9, binary=False, ngram_range=(1, 3))
cV_train = countVectorizer.fit_transform(norm_train_data)
cV_valid = countVectorizer.transform(norm_valid_data)

In [None]:
sksvm = svm.NuSVC(kernel='linear', decision_function_shape='ovo')
sksvm.fit(cV_train, train_data_y)

In [None]:
sksvm.predict(cV_valid)
print(classification_report(y_valid, y_pred))
print("\n\n")
print(confusion_matrix(y_valid, y_pred))
print('accuracy', np.mean(y_valid == y_pred))

## Fourth trail

#### Import library

In [184]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
import tqdm
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from torch.autograd import Variable

In [185]:
if torch.backends.mps.is_available():
    mps_device = torch.device("mps")

#### Data preprocessing

In [210]:
train_df = load_data('train')
valid_df = load_data('valid')
x_train = train_df['text'].map(tokenize).map(lower).map(filter_stopwords).map(stem)
y_train = train_df['label']
x_valid = valid_df['text'].map(tokenize).map(lower).map(filter_stopwords).map(stem)
y_valid = valid_df['label']

select [text, label] columns from the train split
Success
select [text, label] columns from the valid split
Success


In [211]:
for i in range(len(x_train)):
    x_train[i] = ' '.join(x_train[i])
for i in range(len(x_valid)):
    x_valid[i] = ' '.join(x_valid[i])
x_train.to_csv("data/norm_train_data.csv")
x_valid.to_csv("data/norm_valid_data.csv")

In [212]:
norm_train_data = load_data("norm_train_data")['text']
norm_valid_data = load_data("norm_valid_data")['text']

select [text, label] columns from the norm_train_data split
Failed loading specified columns... Returning all columns from the norm_train_data split
select [text, label] columns from the norm_valid_data split
Failed loading specified columns... Returning all columns from the norm_valid_data split


In [213]:
word2id = {}
for tokens in norm_train_data:
    for t in tokens:
        if not t in word2id:
            word2id[t] = len(word2id)
word2id['<pad>'] = len(word2id)

In [214]:
def texts_to_id_seq(texts, padding_length=50):
    records = []
    for tokens in texts:
        record = []
        for t in tokens:
            record.append(word2id.get(t, len(word2id)))
        if len(record) >= padding_length:
            records.append(record[:padding_length])
        else:
            records.append(record + [word2id['<pad>']] * (padding_length - len(record)))
    return records

In [215]:
train_seqs = texts_to_id_seq(norm_train_data)
valid_seqs = texts_to_id_seq(norm_valid_data)

In [216]:
X_train_tensors = Variable(torch.Tensor(train_seqs)).type(torch.LongTensor)
X_valid_tensors = Variable(torch.Tensor(valid_seqs)).type(torch.LongTensor)

y_train_tensors = Variable(torch.Tensor(y_train)).type(torch.LongTensor)
y_valid_tensors = Variable(torch.Tensor(y_valid)).type(torch.LongTensor)

In [217]:
class MyDataset(Dataset):
    
    def __init__(self, seq, y):
        assert len(seq) == len(y)
        self.seq = seq
        self.y = y-1
    
    def __getitem__(self, idx):
        return np.asarray(self.seq[idx]), self.y[idx]

    def __len__(self):
        return len(self.seq)

In [218]:
batch_size = 16

train_loader = DataLoader(MyDataset(X_train_tensors, y_train_tensors), batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(MyDataset(X_valid_tensors, y_valid_tensors), batch_size=batch_size)

#### Building model

In [219]:
class lstm_model(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
        super(lstm_model, self).__init__()
        self.num_classes = num_classes #number of classes
        self.num_layers = num_layers #number of layers
        self.input_size = input_size #input size
        self.hidden_size = hidden_size #hidden state
        self.seq_length = seq_length #sequence length

        self.embedding = nn.Embedding(num_embeddings=len(word2id)+1, embedding_dim=input_size)
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True) #lstm
        self.fc_1 =  nn.Linear(hidden_size, 128) #fully connected 1
        self.max = nn.MaxPool1d(kernel_size=3,
                                stride=1)
        self.drop = nn.Dropout(0.5)
        self.fc = nn.Linear(128, num_classes) #fully connected last layer

        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()
    
    def forward(self,x):
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #hidden state
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #internal state
        # Propagate input through LSTM
        embedd = self.embedding(x)
        embedd = self.drop(embedd)
        output, (hn, cn) = self.lstm(embedd, (h_0, c_0)) #lstm with input, hidden, and internal state
        # return self.softmax(self.fc(output[:,-1,:]))
        hn = hn.view(-1, self.hidden_size) #reshaping the data for Dense layer next
        out = self.relu(hn)
        # out = self.fc_1(out) #first Dense
        # out = self.relu(out) #relu
        out = self.fc(out) #Final Output
        return out

In [220]:
num_classes, input_size, hidden_size, num_layers, seq_length = 5, 64, 128, 1, 1
model = lstm_model(num_classes, input_size, hidden_size, num_layers, seq_length)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = torch.nn.CrossEntropyLoss()     

In [221]:
for e in range(1, 31):    
    print('epoch', e)
    model.train()
    total_acc = 0
    total_loss = 0
    total_count = 0
    with tqdm.tqdm(train_loader) as t:
        for x, y in t:
            # print(x.shape)
            optimizer.zero_grad()
            logits = model(x)
            loss = criterion(logits, y)
            loss.backward()
            total_acc += (logits.argmax(1) == y).sum().item()
            total_count += y.size(0)
            total_loss += loss.item()
            optimizer.step()
            t.set_postfix({'loss': total_loss/total_count, 'acc': total_acc/total_count})

    model.eval()
    y_pred = []
    y_true = []
    with tqdm.tqdm(valid_loader) as t:
        for x, y in t:
            logits = model(x)
            total_acc += (logits.argmax(1) == y).sum().item()
            total_count += len(y)
            y_pred += logits.argmax(1).tolist()
            y_true += y.tolist()
    print(classification_report(y_true, y_pred))
    print("\n\n")
    print(confusion_matrix(y_true, y_pred))

epoch 1


100%|██████████| 1125/1125 [00:31<00:00, 36.18it/s, loss=0.0977, acc=0.253]
100%|██████████| 125/125 [00:02<00:00, 47.62it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       295
           1       0.00      0.00      0.00       198
           2       0.26      0.52      0.34       508
           3       0.24      0.07      0.11       523
           4       0.25      0.43      0.32       476

    accuracy                           0.25      2000
   macro avg       0.15      0.20      0.15      2000
weighted avg       0.19      0.25      0.19      2000




[[  0   0 152  26 117]
 [  0   0 109  13  76]
 [  0   0 262  44 202]
 [  0   0 253  36 234]
 [  0   0 241  28 207]]
epoch 2


100%|██████████| 1125/1125 [00:48<00:00, 23.03it/s, loss=0.0973, acc=0.264]
100%|██████████| 125/125 [00:02<00:00, 62.21it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       295
           1       0.00      0.00      0.00       198
           2       0.25      0.81      0.39       508
           3       0.28      0.10      0.14       523
           4       0.29      0.11      0.16       476

    accuracy                           0.26      2000
   macro avg       0.16      0.20      0.14      2000
weighted avg       0.20      0.26      0.17      2000




[[  0   0 226  34  35]
 [  0   0 176  14   8]
 [  0   0 413  52  43]
 [  0   0 431  51  41]
 [  0   0 391  34  51]]
epoch 3


100%|██████████| 1125/1125 [00:25<00:00, 43.56it/s, loss=0.0969, acc=0.275]
100%|██████████| 125/125 [00:01<00:00, 117.28it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       295
           1       0.00      0.00      0.00       198
           2       0.30      0.51      0.38       508
           3       0.26      0.10      0.14       523
           4       0.32      0.65      0.43       476

    accuracy                           0.31      2000
   macro avg       0.18      0.25      0.19      2000
weighted avg       0.22      0.31      0.24      2000




[[  0   0 169  30  96]
 [  0   0 117  16  65]
 [  0   0 257  62 189]
 [  0   0 181  51 291]
 [  0   0 130  38 308]]
epoch 4


100%|██████████| 1125/1125 [00:25<00:00, 44.92it/s, loss=0.0963, acc=0.287]
100%|██████████| 125/125 [00:01<00:00, 112.36it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       295
           1       0.00      0.00      0.00       198
           2       0.30      0.56      0.39       508
           3       0.23      0.07      0.11       523
           4       0.34      0.63      0.44       476

    accuracy                           0.31      2000
   macro avg       0.17      0.25      0.19      2000
weighted avg       0.22      0.31      0.23      2000




[[  0   0 188  20  87]
 [  0   0 122  13  63]
 [  0   0 282  55 171]
 [  0   0 215  39 269]
 [  0   0 133  41 302]]
epoch 5


100%|██████████| 1125/1125 [00:24<00:00, 45.61it/s, loss=0.0935, acc=0.328]
100%|██████████| 125/125 [00:01<00:00, 109.32it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       295
           1       0.00      0.00      0.00       198
           2       0.29      0.51      0.37       508
           3       0.33      0.47      0.39       523
           4       0.50      0.38      0.43       476

    accuracy                           0.34      2000
   macro avg       0.22      0.27      0.24      2000
weighted avg       0.28      0.34      0.30      2000




[[  0   0 194  80  21]
 [  0   0 136  47  15]
 [  0   0 259 204  45]
 [  0   0 172 246 105]
 [  0   0 121 172 183]]
epoch 6


100%|██████████| 1125/1125 [00:25<00:00, 43.79it/s, loss=0.0916, acc=0.343]
100%|██████████| 125/125 [00:01<00:00, 108.47it/s]


              precision    recall  f1-score   support

           0       0.43      0.04      0.08       295
           1       0.00      0.00      0.00       198
           2       0.29      0.52      0.37       508
           3       0.35      0.38      0.36       523
           4       0.45      0.46      0.45       476

    accuracy                           0.35      2000
   macro avg       0.30      0.28      0.25      2000
weighted avg       0.33      0.35      0.31      2000




[[ 13   0 208  44  30]
 [  5   0 140  30  23]
 [  4   1 264 164  75]
 [  4   0 177 200 142]
 [  4   0 117 138 217]]
epoch 7


100%|██████████| 1125/1125 [00:50<00:00, 22.10it/s, loss=0.09, acc=0.365]  
100%|██████████| 125/125 [00:02<00:00, 45.93it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.33      0.39      0.36       295
           1       0.00      0.00      0.00       198
           2       0.33      0.36      0.35       508
           3       0.35      0.34      0.34       523
           4       0.43      0.54      0.48       476

    accuracy                           0.37      2000
   macro avg       0.29      0.33      0.31      2000
weighted avg       0.33      0.37      0.34      2000




[[115   0 103  37  40]
 [ 64   0  81  28  25]
 [ 80   0 183 147  98]
 [ 60   0 107 177 179]
 [ 28   0  76 115 257]]
epoch 8


100%|██████████| 1125/1125 [01:02<00:00, 17.97it/s, loss=0.0882, acc=0.378]
100%|██████████| 125/125 [00:02<00:00, 50.81it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.40      0.34      0.37       295
           1       0.00      0.00      0.00       198
           2       0.36      0.43      0.39       508
           3       0.36      0.36      0.36       523
           4       0.43      0.56      0.49       476

    accuracy                           0.39      2000
   macro avg       0.31      0.34      0.32      2000
weighted avg       0.35      0.39      0.36      2000




[[ 99   0 111  36  49]
 [ 52   0  87  29  30]
 [ 46   0 219 146  97]
 [ 32   0 126 188 177]
 [ 18   0  73 118 267]]
epoch 9


100%|██████████| 1125/1125 [01:09<00:00, 16.30it/s, loss=0.0869, acc=0.392]
100%|██████████| 125/125 [00:02<00:00, 50.94it/s]


              precision    recall  f1-score   support

           0       0.36      0.42      0.39       295
           1       0.00      0.00      0.00       198
           2       0.35      0.55      0.43       508
           3       0.41      0.21      0.28       523
           4       0.43      0.55      0.49       476

    accuracy                           0.39      2000
   macro avg       0.31      0.35      0.32      2000
weighted avg       0.35      0.39      0.35      2000




[[123   0 117  11  44]
 [ 62   0 107   5  24]
 [ 65   2 279  69  93]
 [ 56   0 177 109 181]
 [ 31   0 108  74 263]]
epoch 10


100%|██████████| 1125/1125 [01:01<00:00, 18.37it/s, loss=0.0856, acc=0.404]
100%|██████████| 125/125 [00:02<00:00, 50.52it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.44      0.33      0.38       295
           1       0.00      0.00      0.00       198
           2       0.34      0.53      0.42       508
           3       0.37      0.26      0.31       523
           4       0.45      0.59      0.51       476

    accuracy                           0.39      2000
   macro avg       0.32      0.34      0.32      2000
weighted avg       0.36      0.39      0.36      2000




[[ 97   0 129  23  46]
 [ 47   0 107  18  26]
 [ 38   0 267 113  90]
 [ 23   0 177 138 185]
 [ 15   0  98  80 283]]
epoch 11


100%|██████████| 1125/1125 [00:44<00:00, 25.22it/s, loss=0.0843, acc=0.417]
100%|██████████| 125/125 [00:02<00:00, 52.80it/s]


              precision    recall  f1-score   support

           0       0.41      0.42      0.42       295
           1       0.00      0.00      0.00       198
           2       0.33      0.37      0.35       508
           3       0.36      0.40      0.38       523
           4       0.47      0.55      0.51       476

    accuracy                           0.39      2000
   macro avg       0.32      0.35      0.33      2000
weighted avg       0.35      0.39      0.37      2000




[[125   0  98  39  33]
 [ 62   0  84  31  21]
 [ 61   2 186 176  83]
 [ 30   0 125 208 160]
 [ 25   2  65 122 262]]
epoch 12


100%|██████████| 1125/1125 [00:48<00:00, 23.22it/s, loss=0.0836, acc=0.421]
100%|██████████| 125/125 [00:01<00:00, 100.29it/s]


              precision    recall  f1-score   support

           0       0.43      0.33      0.37       295
           1       0.14      0.01      0.01       198
           2       0.36      0.47      0.40       508
           3       0.39      0.42      0.40       523
           4       0.51      0.57      0.54       476

    accuracy                           0.41      2000
   macro avg       0.36      0.36      0.34      2000
weighted avg       0.39      0.41      0.39      2000




[[ 97   4 121  38  35]
 [ 48   1 107  30  12]
 [ 41   1 238 159  69]
 [ 24   1 132 218 148]
 [ 16   0  72 117 271]]
epoch 13


100%|██████████| 1125/1125 [00:52<00:00, 21.62it/s, loss=0.0825, acc=0.428]
100%|██████████| 125/125 [00:02<00:00, 54.72it/s]


              precision    recall  f1-score   support

           0       0.39      0.44      0.41       295
           1       0.15      0.01      0.02       198
           2       0.38      0.40      0.39       508
           3       0.41      0.29      0.34       523
           4       0.44      0.70      0.54       476

    accuracy                           0.41      2000
   macro avg       0.35      0.37      0.34      2000
weighted avg       0.38      0.41      0.38      2000




[[130   4  87  19  55]
 [ 71   2  72  18  35]
 [ 64   4 202 117 121]
 [ 39   2 119 150 213]
 [ 30   1  47  66 332]]
epoch 14


100%|██████████| 1125/1125 [00:58<00:00, 19.37it/s, loss=0.0818, acc=0.435]
100%|██████████| 125/125 [00:02<00:00, 48.41it/s]


              precision    recall  f1-score   support

           0       0.42      0.43      0.42       295
           1       0.00      0.00      0.00       198
           2       0.40      0.32      0.35       508
           3       0.38      0.36      0.37       523
           4       0.43      0.71      0.53       476

    accuracy                           0.41      2000
   macro avg       0.33      0.36      0.34      2000
weighted avg       0.36      0.41      0.38      2000




[[126   3  67  33  66]
 [ 71   0  61  28  38]
 [ 53   2 162 168 123]
 [ 30   0  78 190 225]
 [ 19   0  37  83 337]]
epoch 15


100%|██████████| 1125/1125 [00:48<00:00, 23.40it/s, loss=0.081, acc=0.444] 
100%|██████████| 125/125 [00:01<00:00, 101.93it/s]


              precision    recall  f1-score   support

           0       0.46      0.40      0.43       295
           1       0.27      0.02      0.03       198
           2       0.37      0.51      0.43       508
           3       0.40      0.35      0.37       523
           4       0.48      0.58      0.52       476

    accuracy                           0.42      2000
   macro avg       0.40      0.37      0.36      2000
weighted avg       0.41      0.42      0.40      2000




[[119   3 107  20  46]
 [ 51   3  97  32  15]
 [ 55   5 257 114  77]
 [ 23   0 155 183 162]
 [ 11   0  85 105 275]]
epoch 16


100%|██████████| 1125/1125 [00:24<00:00, 45.85it/s, loss=0.0803, acc=0.444]
100%|██████████| 125/125 [00:01<00:00, 118.44it/s]


              precision    recall  f1-score   support

           0       0.40      0.48      0.44       295
           1       0.30      0.06      0.09       198
           2       0.40      0.34      0.37       508
           3       0.39      0.35      0.37       523
           4       0.44      0.64      0.52       476

    accuracy                           0.41      2000
   macro avg       0.38      0.37      0.36      2000
weighted avg       0.39      0.41      0.39      2000




[[143  16  56  25  55]
 [ 76  11  57  24  30]
 [ 76   9 173 144 106]
 [ 39   1  98 181 204]
 [ 27   0  48  94 307]]
epoch 17


100%|██████████| 1125/1125 [00:24<00:00, 46.18it/s, loss=0.0799, acc=0.451]
100%|██████████| 125/125 [00:01<00:00, 117.52it/s]


              precision    recall  f1-score   support

           0       0.49      0.40      0.44       295
           1       0.44      0.04      0.07       198
           2       0.38      0.47      0.42       508
           3       0.39      0.34      0.36       523
           4       0.46      0.64      0.54       476

    accuracy                           0.42      2000
   macro avg       0.43      0.38      0.37      2000
weighted avg       0.43      0.42      0.40      2000




[[119   6  96  29  45]
 [ 52   8  87  26  25]
 [ 46   3 240 131  88]
 [ 20   0 134 177 192]
 [  8   1  78  86 303]]
epoch 18


100%|██████████| 1125/1125 [00:33<00:00, 33.35it/s, loss=0.0791, acc=0.459]
100%|██████████| 125/125 [00:01<00:00, 85.41it/s]


              precision    recall  f1-score   support

           0       0.44      0.45      0.45       295
           1       0.35      0.03      0.06       198
           2       0.39      0.46      0.42       508
           3       0.37      0.33      0.35       523
           4       0.48      0.62      0.54       476

    accuracy                           0.42      2000
   macro avg       0.41      0.38      0.36      2000
weighted avg       0.41      0.42      0.40      2000




[[134   7  88  25  41]
 [ 61   6  80  31  20]
 [ 53   3 234 136  82]
 [ 36   1 141 172 173]
 [ 21   0  64  96 295]]
epoch 19


100%|██████████| 1125/1125 [00:24<00:00, 45.01it/s, loss=0.0786, acc=0.461]
100%|██████████| 125/125 [00:01<00:00, 115.06it/s]


              precision    recall  f1-score   support

           0       0.47      0.35      0.40       295
           1       0.33      0.05      0.09       198
           2       0.37      0.49      0.43       508
           3       0.40      0.30      0.34       523
           4       0.47      0.68      0.56       476

    accuracy                           0.42      2000
   macro avg       0.41      0.38      0.36      2000
weighted avg       0.41      0.42      0.40      2000




[[104  12 105  23  51]
 [ 46  10  94  22  26]
 [ 39   5 251 118  95]
 [ 18   2 156 155 192]
 [ 14   1  67  69 325]]
epoch 20


100%|██████████| 1125/1125 [00:26<00:00, 43.09it/s, loss=0.0778, acc=0.469]
100%|██████████| 125/125 [00:01<00:00, 114.55it/s]


              precision    recall  f1-score   support

           0       0.44      0.44      0.44       295
           1       0.32      0.04      0.07       198
           2       0.36      0.46      0.41       508
           3       0.40      0.34      0.37       523
           4       0.49      0.61      0.55       476

    accuracy                           0.42      2000
   macro avg       0.40      0.38      0.37      2000
weighted avg       0.41      0.42      0.40      2000




[[131   7  94  27  36]
 [ 55   8  90  20  25]
 [ 60   8 236 128  76]
 [ 31   0 155 179 158]
 [ 19   2  72  94 289]]
epoch 21


100%|██████████| 1125/1125 [00:24<00:00, 45.55it/s, loss=0.0772, acc=0.472]
100%|██████████| 125/125 [00:01<00:00, 115.38it/s]


              precision    recall  f1-score   support

           0       0.46      0.43      0.44       295
           1       0.36      0.06      0.10       198
           2       0.38      0.47      0.42       508
           3       0.39      0.38      0.38       523
           4       0.50      0.60      0.55       476

    accuracy                           0.43      2000
   macro avg       0.42      0.39      0.38      2000
weighted avg       0.42      0.43      0.41      2000




[[126  11  84  37  37]
 [ 54  12  87  28  17]
 [ 49   7 240 140  72]
 [ 27   2 144 197 153]
 [ 16   1  72 103 284]]
epoch 22


100%|██████████| 1125/1125 [00:24<00:00, 46.11it/s, loss=0.0767, acc=0.474]
100%|██████████| 125/125 [00:01<00:00, 120.27it/s]


              precision    recall  f1-score   support

           0       0.43      0.51      0.46       295
           1       0.23      0.10      0.14       198
           2       0.36      0.40      0.38       508
           3       0.39      0.35      0.37       523
           4       0.51      0.59      0.55       476

    accuracy                           0.42      2000
   macro avg       0.39      0.39      0.38      2000
weighted avg       0.40      0.42      0.41      2000




[[149  18  73  24  31]
 [ 63  19  74  27  15]
 [ 72  26 204 135  71]
 [ 36  13 141 183 150]
 [ 26   5  69  97 279]]
epoch 23


100%|██████████| 1125/1125 [00:25<00:00, 44.13it/s, loss=0.0761, acc=0.477]
100%|██████████| 125/125 [00:01<00:00, 111.76it/s]


              precision    recall  f1-score   support

           0       0.44      0.49      0.46       295
           1       0.32      0.07      0.11       198
           2       0.39      0.43      0.41       508
           3       0.39      0.33      0.36       523
           4       0.48      0.64      0.55       476

    accuracy                           0.43      2000
   macro avg       0.40      0.39      0.38      2000
weighted avg       0.41      0.43      0.41      2000




[[144  11  71  31  38]
 [ 60  13  79  26  20]
 [ 66  11 216 123  92]
 [ 33   4 135 175 176]
 [ 22   2  56  91 305]]
epoch 24


100%|██████████| 1125/1125 [00:25<00:00, 43.98it/s, loss=0.0756, acc=0.487]
100%|██████████| 125/125 [00:01<00:00, 108.30it/s]


              precision    recall  f1-score   support

           0       0.49      0.38      0.43       295
           1       0.27      0.06      0.10       198
           2       0.37      0.44      0.40       508
           3       0.37      0.35      0.36       523
           4       0.47      0.62      0.53       476

    accuracy                           0.41      2000
   macro avg       0.39      0.37      0.36      2000
weighted avg       0.40      0.41      0.40      2000




[[112  16  90  33  44]
 [ 42  12  91  33  20]
 [ 48   9 226 136  89]
 [ 16   5 141 181 180]
 [ 11   2  61 107 295]]
epoch 25


100%|██████████| 1125/1125 [00:26<00:00, 43.15it/s, loss=0.0751, acc=0.488]
100%|██████████| 125/125 [00:01<00:00, 113.44it/s]


              precision    recall  f1-score   support

           0       0.47      0.45      0.46       295
           1       0.25      0.04      0.07       198
           2       0.37      0.46      0.41       508
           3       0.39      0.36      0.37       523
           4       0.50      0.58      0.54       476

    accuracy                           0.42      2000
   macro avg       0.39      0.38      0.37      2000
weighted avg       0.41      0.42      0.41      2000




[[134   8  85  31  37]
 [ 59   8  84  29  18]
 [ 54  10 236 134  74]
 [ 26   6 155 189 147]
 [ 15   0  79 104 278]]
epoch 26


100%|██████████| 1125/1125 [00:24<00:00, 45.56it/s, loss=0.0746, acc=0.497]
100%|██████████| 125/125 [00:01<00:00, 116.47it/s]


              precision    recall  f1-score   support

           0       0.44      0.48      0.46       295
           1       0.25      0.07      0.10       198
           2       0.38      0.44      0.41       508
           3       0.40      0.29      0.34       523
           4       0.48      0.68      0.56       476

    accuracy                           0.42      2000
   macro avg       0.39      0.39      0.37      2000
weighted avg       0.41      0.42      0.40      2000




[[141  14  77  20  43]
 [ 62  13  75  24  24]
 [ 66  15 222 113  92]
 [ 32   6 148 152 185]
 [ 20   3  63  68 322]]
epoch 27


100%|██████████| 1125/1125 [00:30<00:00, 36.55it/s, loss=0.0741, acc=0.497]
100%|██████████| 125/125 [00:01<00:00, 116.69it/s]


              precision    recall  f1-score   support

           0       0.47      0.45      0.46       295
           1       0.38      0.09      0.15       198
           2       0.35      0.39      0.37       508
           3       0.38      0.36      0.37       523
           4       0.50      0.65      0.56       476

    accuracy                           0.42      2000
   macro avg       0.42      0.39      0.38      2000
weighted avg       0.41      0.42      0.41      2000




[[134  10  79  32  40]
 [ 55  18  78  29  18]
 [ 57  13 197 151  90]
 [ 25   5 142 187 164]
 [ 16   2  59  91 308]]
epoch 28


100%|██████████| 1125/1125 [00:33<00:00, 33.74it/s, loss=0.0735, acc=0.499]
100%|██████████| 125/125 [00:01<00:00, 111.84it/s]


              precision    recall  f1-score   support

           0       0.49      0.40      0.44       295
           1       0.31      0.13      0.18       198
           2       0.36      0.40      0.38       508
           3       0.39      0.43      0.41       523
           4       0.52      0.59      0.55       476

    accuracy                           0.43      2000
   macro avg       0.42      0.39      0.39      2000
weighted avg       0.42      0.43      0.42      2000




[[119  26  78  36  36]
 [ 45  25  80  30  18]
 [ 45  19 201 171  72]
 [ 22   7 133 227 134]
 [ 11   3  68 112 282]]
epoch 29


100%|██████████| 1125/1125 [00:25<00:00, 44.49it/s, loss=0.073, acc=0.503] 
100%|██████████| 125/125 [00:01<00:00, 121.04it/s]


              precision    recall  f1-score   support

           0       0.42      0.52      0.46       295
           1       0.31      0.08      0.13       198
           2       0.41      0.37      0.39       508
           3       0.38      0.41      0.40       523
           4       0.52      0.60      0.56       476

    accuracy                           0.43      2000
   macro avg       0.41      0.40      0.39      2000
weighted avg       0.42      0.43      0.42      2000




[[153  17  52  41  32]
 [ 69  16  63  33  17]
 [ 78  13 188 156  73]
 [ 43   4 115 217 144]
 [ 24   2  44 119 287]]
epoch 30


100%|██████████| 1125/1125 [00:25<00:00, 44.00it/s, loss=0.0727, acc=0.511]
100%|██████████| 125/125 [00:01<00:00, 97.26it/s] 


              precision    recall  f1-score   support

           0       0.46      0.46      0.46       295
           1       0.30      0.16      0.20       198
           2       0.39      0.41      0.40       508
           3       0.39      0.33      0.36       523
           4       0.50      0.66      0.56       476

    accuracy                           0.43      2000
   macro avg       0.41      0.40      0.40      2000
weighted avg       0.42      0.43      0.42      2000




[[137  27  64  27  40]
 [ 56  31  65  24  22]
 [ 61  30 207 124  86]
 [ 26  12 143 173 169]
 [ 21   5  48  90 312]]
