In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
import random

KeyboardInterrupt: 

In [None]:
torch.manual_seed(2023)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
train = pd.read_csv(r'C:\Users\connor\PycharmProjects\portfolio\sentiment_analysis_cnn\train.csv')
test = pd.read_csv(r'C:\Users\connor\PycharmProjects\portfolio\sentiment_analysis_cnn\test.csv')

In [None]:
print(train.describe())
train.head()

In [None]:
grouped_count = train.groupby(train['sentiment']).count()

grouped_count.plot(kind='bar', ylabel='Count', xlabel='Sentiment', title='Sentiment Distribution', legend=False)

In [None]:
print(603/len(train))

In [None]:
X_train = train.text
y_train = train.sentiment

In [None]:
y_train

## Synthetic Data

In [None]:
# Synthetic Data Creation
import nltk
from nltk.corpus import wordnet

np.random.seed(2023)

nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

def synonym_replacement(sentence):
    words = nltk.word_tokenize(sentence)
    pos_tags = nltk.pos_tag(words)

    new_words = []
    for word, tag in pos_tags:
        synsets = wordnet.synsets(word, pos=wordnet_tag(tag))
        if synsets:
            synonyms = [lemma.name() for synset in synsets for lemma in synset.lemmas()]
            synonym = np.random.choice(synonyms) if synonyms else word
            new_words.append(synonym)
        else:
            new_words.append(word)
    return ' '.join(new_words)

def wordnet_tag(tag):
    if tag.startswith('N'):
        return wordnet.NOUN
    elif tag.startswith('V'):
        return wordnet.VERB
    elif tag.startswith('J'):
        return wordnet.ADJ
    elif tag.startswith('R'):
        return wordnet.ADV
    else:
        return None

for itr, row in enumerate(X_train):
    print(row)
    print(synonym_replacement(row))
    break
# Add synonym replaced sentences to X_train and update y_train
X_train_synth = []
y_train_synth = []

for itr, row in enumerate(X_train):
    X_train_synth.append(row)
    # Add synthetic row
    X_train_synth.append(synonym_replacement(row))
    # append the true y-value twice. Once for the real row, and once for the same synth row
    y_train_synth.append(y_train[itr])
    y_train_synth.append(y_train[itr])

In [None]:
print(X_train[0:10])

In [None]:
print(pd.DataFrame(X_train_synth)[0:20])

## Shuffle and split into train/valid

Test set is on Kaggle and only takes id and labels

In [None]:
torch.tensor(y_train_synth).unique()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_train_synth, y_train_synth, shuffle=True, test_size=0.1, random_state=42)

In [None]:
pd.DataFrame(y_test).describe()

In [None]:
# to_categorical has to take values 0-n not 1-n
y_train = [x-1 for x in y_train]
y_test = [x-1 for x in y_test]

In [None]:
# Convert categories to one-hot encoding
y_train = to_categorical(y_train, num_classes=5)
y_test = to_categorical(y_test, num_classes=5)

In [None]:
pd.DataFrame(y_test).describe()

## Bag of Words feature_extractor

In [None]:
X_train_gram_vectorizer = CountVectorizer(ngram_range=(1, 2))
X_train_gram_vectorizer.fit(X_train)
X_train_gram_vectorizer = X_train_gram_vectorizer.transform(X_train).toarray()
X_train_bag_of_words = torch.tensor(X_train_gram_vectorizer)

X_test_gram_vectorizer = CountVectorizer(ngram_range=(1, 2))
X_test_gram_vectorizer.fit(X_test)
X_test_gram_vectorizer = X_test_gram_vectorizer.transform(X_train).toarray()
X_test_bag_of_words = torch.tensor(X_test_gram_vectorizer)

In [None]:
X_train_bag_of_words.unique()

## Tokenize

In [None]:
# Preprocess text
# Create tokenizer
tokenizer = Tokenizer()
# fit the tokenizer on the documents
tokenizer.fit_on_texts(X_train)

# define vocabulary size (largest integer value)
vocab_size = len(tokenizer.word_index) + 1

# sequence encode
encoded_train = tokenizer.texts_to_sequences(X_train)

# pad sequences
# max_length will be a tunable hyperparameter
max_length = 200
X_train = pad_sequences(encoded_train, maxlen=max_length, padding='post')

# Preprocess test set
# sequence encode
encoded_test = tokenizer.texts_to_sequences(X_test)

# pad sequences
X_test = pad_sequences(encoded_test, maxlen=max_length, padding='post')

In [None]:
X_train = torch.tensor(X_train, dtype=torch.int).to(device)
X_test = torch.tensor(X_test, dtype=torch.int).to(device)

y_train = torch.tensor(y_train, dtype=torch.float).to(device)
y_test = torch.tensor(y_test, dtype=torch.float).to(device)

## Pretrained Embeddings

In [None]:
# Load pretrained glove twitter embeddings
import gensim

# Keep commented out since this load takes a while. Save it to disk for quicker use
# model = gensim.models.KeyedVectors.load_word2vec_format(r'C:\Users\connor\PycharmProjects\rice\comp_647\assignment_2\pretrained_embeddings\glove.twitter.27B.200d_wv.txt')
# model.save(r'C:\Users\connor\PycharmProjects\rice\comp_647\assignment_2\pretrained_embeddings\glove.twitter.27B.200d_usable_weights')
model = gensim.models.KeyedVectors.load(
    r'C:\Users\connor\PycharmProjects\rice\comp_647\assignment_2\pretrained_embeddings\glove.twitter.27B.200d_usable_weights')
weights = torch.FloatTensor(model.vectors)
embedding = nn.Embedding.from_pretrained(weights, freeze=True)

In [None]:
# Confirm embeddings work, get embeddings for index 1
input = torch.LongTensor([1])
embedding(input)

## Twitter-roBERTa-base

In [None]:
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer, AutoConfig

MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
twitter_roberta = AutoModelForSequenceClassification.from_pretrained(MODEL)



In [None]:
twitter_roberta

In [None]:
# You must batch twitter_roberta or it will run out of memory
class TWITTERROBERTA(nn.Module):
    def __init__(self, pretrained_model=twitter_roberta, requires_grad=False, classes=5):
        super().__init__()
        self.twitter_roberta = pretrained_model
        
        self.twitter_roberta.transform_input=False
        self.twitter_roberta.classifier.out_proj = nn.Linear(768, classes)
    
        # freeze the last layer if false
        if not requires_grad:
            self._freeze_param()
    
    def _freeze_param(self):
        for k,v in self.named_parameters():
            if k.startswith("out_proj"):
                v.requires_grad = True
                                   
    def forward(self, x):
        return self.twitter_roberta(x).logits

In [None]:
# Need to create dataloader object for twitter-roBERTa since it is too big. 
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True)

In [None]:
## confirm dataloader is functioning properly
# for train, label in train_dataloader:
#     print(train)
#     print()
#     print(label)
#     break

## Transformer Encoder

In [None]:
# not working correctly when batched
class TransformerEncoder(nn.Module):
    def __init__(self, heads, num_linear_layers, num_classes=5):
        super(TransformerEncoder, self).__init__()
        # create embeddings
        self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=max_length)
        # Initialize the encoder 
        self.encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=max_length, nhead=heads), num_layers=num_linear_layers)
        # Define the fully connected layer
        self.linear = nn.Linear(max_length, num_classes)

    def forward(self, x):
        # Pass the input through the transformer encoder 
        x = self.encoder(x)
        x = x.mean(dim=1) 
        return self.linear(x)

## CNN

In [None]:
class CNN(nn.Module):
    def __init__(self, cnn_num_classes=5, dropout_rate=0, batch_norm=False, num_linear_layers=3, num_neurons=16, num_cnn_layers=3, num_filters=32, activation_function=nn.ReLU(), pretrained_embedding=False):
        super().__init__()
        
        # feature extraction
        if pretrained_embedding:
            self.embedding = embedding
        else:
            self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=max_length)
        self.cnn = nn.Conv1d(in_channels=max_length, out_channels=num_filters, kernel_size=3, padding='same')
        self.flatten = nn.Flatten()
        # first and last linear layer and other one use layers
        self.linear = nn.Linear(in_features=max_length*num_filters, out_features=num_neurons)
        self.dropout = nn.Dropout(p=dropout_rate)
        self.output = nn.Linear(in_features=num_neurons, out_features=cnn_num_classes)
        self.activation_function = activation_function
        self.batch_normalization = nn.BatchNorm1d(num_features=num_neurons)
        
        # layer lists
        self.dropout_list = nn.ModuleList([nn.Dropout(dropout_rate) for dummy in range(num_linear_layers-1)])
        self.batch_normalization_list = nn.ModuleList([nn.BatchNorm1d(num_features=num_neurons) for dummy in range(num_linear_layers-1)]) #batch norm doesn't make sense for unbatched data which is what I have
        self.hidden_layers_list = nn.ModuleList([nn.Linear(num_neurons, num_neurons) for dummy in range(num_linear_layers-1)])
        self.hidden_cnn_layers_list = nn.ModuleList([nn.Conv1d(in_channels=num_filters, out_channels=num_filters, kernel_size=3, padding='same') for dummy in range(num_cnn_layers-1)])
        
        # parameters used in forward function
        self.num_linear_layers = num_linear_layers
        self.num_cnn_layers = num_cnn_layers
        self.batch_norm = batch_norm
        
    def forward(self, x):
        x = self.embedding(x)
        x = self.cnn(x)
        for layer in range(self.num_cnn_layers-1):
            x = self.hidden_cnn_layers_list[layer](x)
        x = self.flatten(x)
        if self.batch_norm:
            x = self.dropout(self.activation_function(self.batch_normalization(self.linear(x))))
        else:
            x = self.dropout(self.activation_function(self.linear(x)))
        for layer in range(self.num_linear_layers-1):
            if self.batch_norm:
                x = self.dropout_list[layer](self.activation_function(self.batch_normalization_list[layer](self.hidden_layers_list[layer](x))))
            else:
                x = self.dropout_list[layer](self.activation_function(self.hidden_layers_list[layer](x)))
        x = self.output(x)
        return x
        
        

In [None]:
def train_twitter_roberta(model, criterion, optimizer, num_epochs, model_name, train_dataloader=train_dataloader, test_dataloader=test_dataloader, early_stopping=True, device=device):
    epoch_train_loss_list = []
    epoch_train_accuracy_list = []
    epoch_valid_loss_list = []
    epoch_valid_accuracy_list = []
    early_stopping_counter = 0
    best_loss = np.inf
    
    for epoch in range(num_epochs):
        for train_x_data, train_y_data in train_dataloader:
            train_x_data.to(device)
            train_y_data.to(device)
            
            model.train()
            optimizer.zero_grad()
            outputs = model(train_x_data)
            loss_train = criterion(outputs, train_y_data)
            
            # Take softmax of the output -> get the argmax to convert it to a single number -> one hot encode to get it to the same format as the label. 
            softmax_outputs = torch.nn.functional.one_hot(torch.argmax(nn.Softmax(dim=0)(outputs), dim=1)).to(torch.float)
            total_right = 0
            
            # update since now using batch sizes that aren't equal to the full data length
            for row in range(len(train_y_data)):
                # need to use .equal since torchmetrics.accuracy will evaluate each value in each row. Ex.) pred = [0,0,1,0,0,0], actual = [1,0,0,0,0,0] as 66% correct. Expected behavior is this would be 0%
                if torch.equal(softmax_outputs[row], train_y_data[row]):
                    total_right += 1       
            epoch_accuracy_train = total_right / len(train_y_data)
        
            epoch_train_loss_list.append(loss_train.item())
            epoch_train_accuracy_list.append(epoch_accuracy_train)
        
            loss_train.backward()
            optimizer.step()
    
        model.eval()
        with torch.no_grad():
            for valid_x_data, valid_y_data in test_dataloader:
                valid_x_data.to(device)
                valid_y_data.to(device)
                
                outputs = model(valid_x_data)
                loss_valid = criterion(outputs, valid_y_data)
                best_accuracy = -np.inf
                
                softmax_outputs = torch.nn.functional.one_hot(torch.argmax(nn.Softmax(dim=0)(outputs), dim=1)).to(torch.float)
                total_right = 0
                for row in range(len(valid_y_data)):
                    # need to use .equal since torchmetrics.accuracy will evaluate each value in each row. Ex.) pred = [0,0,1,0,0,0], actual = [1,0,0,0,0,0] as 66% correct. Expected behavior is this would be 0%
                    if torch.equal(softmax_outputs[row], valid_y_data[row]):
                        total_right += 1       
                epoch_accuracy_valid = total_right / len(valid_y_data)
        
                epoch_valid_loss_list.append(loss_valid.item())
                epoch_valid_accuracy_list.append(epoch_accuracy_valid)

        if loss_valid.item() < best_loss:
            early_stopping_counter = 0
            best_loss = loss_valid.item()
            torch.save(model.state_dict(), fr'C:\Users\connor\PycharmProjects\comp_647_final\model_weights\{model_name}')

        # if epoch_accuracy_valid >= best_accuracy:
        #     best_accuracy = epoch_accuracy_valid
        #     torch.save(model.state_dict(), fr'C:\Users\connor\PycharmProjects\comp_647_final\model_weights\{model_name}_accuracy')

        # Stop training if after early_stopping_patience epochs, validation loss does not go down
        print('Epoch:', epoch, 'Best Valid Loss:', best_loss, 'Current Valid loss:', loss_valid.item(), 'Current Train loss:', loss_train.item(), 'Stop Counter:', early_stopping_counter)
        early_stopping_counter += 1
        if early_stopping and early_stopping_counter >= 10:
            print("Early Stopping Triggered")
            break
        
    return epoch_train_loss_list, epoch_valid_loss_list, epoch_train_accuracy_list, epoch_valid_accuracy_list

In [None]:
def train_model(model, criterion, optimizer, train_x_data, train_y_data, valid_x_data, valid_y_data, num_epochs, model_name, early_stopping=True, device=device):
    epoch_train_loss_list = []
    epoch_train_accuracy_list = []
    epoch_valid_loss_list = []
    epoch_valid_accuracy_list = []
    early_stopping_counter = 0
    best_loss = np.inf
    
    train_x_data.to(device)
    train_y_data.to(device)
    valid_x_data.to(device)
    valid_y_data.to(device)
    
    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(train_x_data)
        loss_train = criterion(outputs, train_y_data)
        
        # Take softmax of the output -> get the argmax to convert it to a single number -> one hot encode to get it to the same format as the label. 
        softmax_outputs = torch.nn.functional.one_hot(torch.argmax(nn.Softmax(dim=0)(outputs), dim=1)).to(torch.float)
        total_right = 0
        for row in range(len(train_y_data)):
            # need to use .equal since torchmetrics.accuracy will evaluate each value in each row. Ex.) pred = [0,0,1,0,0,0], actual = [1,0,0,0,0,0] as 66% correct. Expected behavior is this would be 0%
            if torch.equal(softmax_outputs[row], train_y_data[row]):
                total_right += 1       
        epoch_accuracy_train = total_right / len(train_y_data)
        
        epoch_train_loss_list.append(loss_train.item())
        epoch_train_accuracy_list.append(epoch_accuracy_train)
        
        loss_train.backward()
        optimizer.step()
    
        model.eval()
        with torch.no_grad():
            outputs = model(valid_x_data)
            loss_valid = criterion(outputs, valid_y_data)
            best_accuracy = -np.inf
            
            softmax_outputs = torch.nn.functional.one_hot(torch.argmax(nn.Softmax(dim=0)(outputs), dim=1)).to(torch.float)
            total_right = 0
            for row in range(len(valid_y_data)):
                # need to use .equal since torchmetrics.accuracy will evaluate each value in each row. Ex.) pred = [0,0,1,0,0,0], actual = [1,0,0,0,0,0] as 66% correct. Expected behavior is this would be 0%
                if torch.equal(softmax_outputs[row], valid_y_data[row]):
                    total_right += 1       
            epoch_accuracy_valid = total_right / len(valid_y_data)
    
            epoch_valid_loss_list.append(loss_valid.item())
            epoch_valid_accuracy_list.append(epoch_accuracy_valid)

        if loss_valid.item() < best_loss:
            early_stopping_counter = 0
            best_loss = loss_valid.item()
            torch.save(model.state_dict(), fr'C:\Users\connor\PycharmProjects\comp_647_final\model_weights\{model_name}')

        # if epoch_accuracy_valid >= best_accuracy:
        #     best_accuracy = epoch_accuracy_valid
        #     torch.save(model.state_dict(), fr'C:\Users\connor\PycharmProjects\comp_647_final\model_weights\{model_name}_accuracy')

        # Stop training if after early_stopping_patience epochs, validation loss does not go down
        print('Epoch:', epoch, 'Best Valid Loss:', best_loss, 'Current Valid loss:', loss_valid.item(), 'Current Train loss:', loss_train.item(), 'Stop Counter:', early_stopping_counter)
        early_stopping_counter += 1
        if early_stopping and early_stopping_counter >= 10:
            print("Early Stopping Triggered")
            break
        
    return epoch_train_loss_list, epoch_valid_loss_list, epoch_train_accuracy_list, epoch_valid_accuracy_list

In [None]:
def test_accuracy(model, X_test_data, y_test_data):
    # epoch_accuracy_valid = Accuracy(task="multiclass", num_classes=6).to(device)
    model.eval()
    with torch.no_grad():
        outputs = model(X_test_data)
        # Take softmax of the output -> get the argmax to convert it to a single number -> one hot encode to get it to the same format as the label. 
        softmax_outputs = torch.nn.functional.one_hot(torch.argmax(nn.Softmax(dim=0)(outputs), dim=1)).to(torch.float)
        #softmax_outputs = torch.nn.functional.one_hot(torch.argmax(nn.Softmax(dim=-1)(outputs), dim=1)).to(torch.float)
        
        total_right = 0
        for row in range(len(y_test_data)):
            # need to use .equal since torchmetrics.accuracy will evaluate each value in each row. Ex.) pred = [0,0,1,0,0,0], actual = [1,0,0,0,0,0] as 66% correct. Expected behavior is this would be 0%
            if torch.equal(softmax_outputs[row], y_test_data[row]):
                total_right += 1
                
        accuracy = total_right / len(y_test_data)

    return accuracy, softmax_outputs

In [None]:
def plot_loss_and_accuracy(train_loss_list, valid_loss_list, train_accuracy_list, valid_accuracy_list, title):
    """
    train_loss_list: list containing the loss per epoch of the training data during training
    valid_loss_list: list containing the loss of the valid dataset using the trained dataset at the end of each epoch
    """
    fig, axes = plt.subplots(nrows=2)
    fig.suptitle(title,x=.55, y=1.75, fontsize=20)
    
    axes[0].set_title('Training and Validation Loss')    
    axes[0].plot(train_loss_list, color='b', label='Training Loss')
    axes[0].plot(valid_loss_list, color='orange', label='Validation Loss')
    axes[0].legend(['Train', 'Valid'])
    axes[0].set_xlabel('Num Epoch')
    axes[0].set_ylabel('Loss')
    
    axes[1].set_title('Training and Validation Accuracy')    
    axes[1].plot(train_accuracy_list, color='b', label='Training Accuracy')
    axes[1].plot(valid_accuracy_list, color='orange', label='Validation Accuracy')
    axes[1].legend(['Train', 'Valid'])
    axes[1].set_xlabel('Num Epoch')
    axes[1].set_ylabel('Accuracy')
    
    plt.tight_layout()
    plt.subplots_adjust(top=1.5)
    
    return fig, axes

In [None]:
def trainable_parameters_count(model):
    return sum(param.numel() for param in model.parameters() if param.requires_grad)

In [None]:
## Random Grid Search Func
def random_grid_search(architecture_type, lr_list, activation_function_list, neuron_num_list, batch_norm_list, dropout_rate_list, num_linear_layers, num_cnn_layers, num_epochs, early_stop_num, num_classes=5, num_combos=5, seed_value=None, pretrained_embeddings=False):
    # set seed for reproducibility
    random.seed(seed_value)
    
    best_model_loss = np.inf
    best_rand_grid_model = None
    # randomly choose combo
    for dummy in range(num_combos):
        lr = random.choice(lr_list)
        activation = random.choice(activation_function_list)
        linear_layers = random.choice(num_linear_layers_list)
        cnn_layers = random.choice(num_cnn_layers_list)
        neuron_num = random.choice(neuron_num_list)
        batch_norm_t_f = random.choice(batch_norm_list)
        dropout = random.choice(dropout_rate_list)
        network_name = str(lr) + '_' + str(activation) + '_' + str(neuron_num) + '_' + str(batch_norm_t_f) + '_' + str(dropout) + '_' + str(linear_layers) + '_' + str(cnn_layers) + '_' +str(architecture_type) + '_' +str(pretrained_embeddings)
        
        if architecture_type == 'CNN':
            grid = CNN(cnn_num_classes=num_classes, activation_function=activation, num_neurons=neuron_num, num_linear_layers=linear_layers, num_cnn_layers=cnn_layers, batch_norm=batch_norm_t_f, dropout_rate=dropout, pretrained_embedding=pretrained_embeddings)
        # REPLACE WITH SECOND MODEL TYPE
        elif architecture_type == 'Transformer':
            grid = CNN(cnn_num_classes=num_classes, activation_function=activation, num_neurons=neuron_num, num_linear_layers=linear_layers, num_cnn_layers=cnn_layers, batch_norm=batch_norm_t_f, dropout_rate=dropout)
        grid.to(device)
        loss = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(grid.parameters(), lr=lr)
        print('Network Name:', network_name, 'Trainable Parameters:', trainable_parameters_count(grid))
        train_loss, valid_loss, train_acc, valid_acc = train_model(model=grid, criterion=loss, optimizer=optimizer, train_x_data=X_train, valid_x_data=X_test, train_y_data=y_train, valid_y_data=y_test, num_epochs=num_epochs, early_stopping=False, model_name=network_name, device=device)
        
        # plot training and validation loss as well as accuracy for both
        plot_loss_and_accuracy(train_loss, valid_loss, train_acc, valid_acc, title=network_name)
        # keep track of which model had the lowest loss of all models. This model is saved as network_name in the model_weights directory
        min_valid_loss = np.inf
        for val in valid_loss:
            if val < min_valid_loss:
                min_valid_loss = val
                
        if min_valid_loss < best_model_loss:
            best_model_loss = min_valid_loss
            best_rand_grid_model = network_name
            
    return best_rand_grid_model

## Build model

In [None]:
# parameter tuning
num_classes = 5
num_epochs = 1000
early_stop_num = 100
num_linear_layers_list = [3,4,5,8]
num_cnn_layers_list = [3,4,5,8]
lr_list = [.0001, .00001, .000001, .0000001]
activation_function_list = [nn.ReLU(), nn.LeakyReLU(), nn.Softmax(dim=0)]
dropout_rate_list = [.1, .2, .5, .75]
neuron_num_list = [16, 32, 64, 128, 256, 512]
batch_norm_list = [True, False]

In [None]:
# best_transformer_model = random_grid_search(
#     architecture_type='Transformer', 
#     lr_list=lr_list, 
#     activation_function_list=activation_function_list, 
#     neuron_num_list=neuron_num_list, 
#     batch_norm_list=batch_norm_list, 
#     dropout_rate_list=dropout_rate_list, 
#     num_epochs=num_epochs, 
#     early_stop_num=early_stop_num, 
#     num_cnn_layers=num_linear_layers_list, 
#     num_linear_layers=num_cnn_layers_list,
#     num_combos=20, 
#     seed_value=16)

In [None]:
transformer_model = TransformerEncoder(heads=8, num_linear_layers=5)

In [None]:
# best_cnn_model = random_grid_search(
#     architecture_type='CNN', 
#     lr_list=lr_list, 
#     activation_function_list=activation_function_list, 
#     neuron_num_list=neuron_num_list, 
#     batch_norm_list=batch_norm_list, 
#     dropout_rate_list=dropout_rate_list, 
#     pretrained_embeddings=True,
#     num_epochs=num_epochs, 
#     early_stop_num=early_stop_num, 
#     num_cnn_layers=num_linear_layers_list, 
#     num_linear_layers=num_cnn_layers_list,
#     num_combos=20, 
#     seed_value=16)

In [None]:
best_model = CNN(cnn_num_classes=5, activation_function=nn.LeakyReLU(), num_neurons=256, num_linear_layers=3, num_cnn_layers=3, batch_norm=False, dropout_rate=.2)
best_model.to('cpu')
best_model.load_state_dict(torch.load(r'C:\Users\connor\PycharmProjects\comp_647_final\model_weights\0.0001_LeakyReLU(negative_slope=0.01)_256_False_0.2_3_3_1', weights_only=True))

In [None]:
# twitter_roberta test
t_roberta = TWITTERROBERTA()
t_roberta.to(device)
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(t_roberta.parameters(), lr=.0001)
epoch_train_loss_list, epoch_valid_loss_list, epoch_train_accuracy_list, epoch_valid_accuracy_list = train_twitter_roberta(t_roberta, loss, optimizer, 10, 'ROBERTA TEST')
plot_loss_and_accuracy(epoch_train_loss_list, epoch_valid_loss_list, epoch_train_accuracy_list, epoch_valid_accuracy_list, title='ROBERTA TEST')

In [None]:
best_model = TWITTERROBERTA()
best_model.to('cpu')
best_model.load_state_dict(torch.load(r'C:\Users\connor\PycharmProjects\comp_647_final\model_weights\ROBERTA TEST', weights_only=True))

## Predictions

In [None]:
encoded_test = tokenizer.texts_to_sequences(test.text)

# pad sequences
test = pad_sequences(encoded_test, maxlen=max_length, padding='post')
test = torch.tensor(test, dtype=torch.int).to('cpu')

In [None]:
best_model.eval()
with torch.no_grad():
    outputs = best_model(torch.tensor(test))

In [None]:
softmax_outputs = torch.nn.functional.one_hot(torch.argmax(nn.Softmax(dim=0)(outputs), dim=1)).to(torch.float)
softmax_outputs

In [None]:
predictions = pd.from_dummies(pd.DataFrame(softmax_outputs.numpy()))

In [None]:
predictions.columns = ['sentiment']

In [None]:
predictions['id'] = predictions.index

In [None]:
predictions = predictions[['id', 'sentiment']]
predictions.id = [x + 1 for x in predictions.id]

In [None]:
predictions.reset_index(drop=True, inplace=True)
predictions.to_csv(r'C:\Users\connor\PycharmProjects\comp_647_final\predictions.csv')