In [None]:
pip install torchtext==0.12.0

In [None]:
pip install gradio

In [17]:
import torch
import torchtext
from torchtext import data
import torch.optim as optim
import argparse
import os
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np

In [18]:
# 3.3.1
# The first time you run this will download a 862MB size file to .vector_cache/glove.6B.zip
glove = torchtext.vocab.GloVe(name="6B",dim=100) # embedding size = 100

In [47]:
class CNN(torch.nn.Module):
    def __init__(self, vocab, k1, k2, n1, n2):
        super(CNN, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(vocab.vectors, freeze=True)
        embedding_dim = vocab.vectors.shape[1]

        self.conv1 = nn.Conv2d(in_channels=1, out_channels=n1, kernel_size=(k1, embedding_dim), bias=False)
        self.conv2 = nn.Conv2d(in_channels=1, out_channels=n2, kernel_size=(k2, embedding_dim), bias=False)

        self.fc = nn.Linear(n1 + n2, 1)

    def forward(self, x):
        embedded = self.embedding(x)
        # need to reorder the layers or else the model breaks, this debug was done by copilot
        embedded = embedded.permute(1, 0, 2).unsqueeze(1)

        # squeezes at the end  was a google collab auto suggestion (did not work prior but ths fixed it)
        conv1_out = F.relu(self.conv1(embedded)).squeeze(3)
        conv1_out = F.max_pool1d(conv1_out, conv1_out.shape[2]).squeeze(2)

        conv2_out = F.relu(self.conv2(embedded)).squeeze(3)
        conv2_out = F.max_pool1d(conv2_out, conv2_out.shape[2]).squeeze(2)

        concat_out = torch.cat((conv1_out, conv2_out), dim=1)

        output = self.fc(concat_out).squeeze(1)

        return output

In [48]:
class BaselineModel(torch.nn.Module):
    def __init__(self, vocab):
        super(BaselineModel, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(vocab.vectors)
        self.fc = nn.Linear(100, 1)

    def forward(self, x):
        embeddings = self.embedding(x)
        avg_embeddings = torch.mean(embeddings, dim=0)
        output = self.fc(avg_embeddings)
        return output

In [49]:
checkpoint_baseline = torch.load('model_baseline.pt')
model_baseline = BaselineModel(glove)
model_baseline.load_state_dict(checkpoint_baseline)

<All keys matched successfully>

In [50]:
checkpoint_cnn = torch.load('CNN.pt')
model_cnn = CNN(glove, k1 = 2, n1 = 20, k2 =3, n2 = 25)
model_cnn.load_state_dict(checkpoint_cnn)

<All keys matched successfully>

In [51]:
model_baseline.eval()

BaselineModel(
  (embedding): Embedding(400000, 100)
  (fc): Linear(in_features=100, out_features=1, bias=True)
)

In [52]:
model_cnn.eval()

CNN(
  (embedding): Embedding(400000, 100)
  (conv1): Conv2d(1, 20, kernel_size=(2, 100), stride=(1, 1), bias=False)
  (conv2): Conv2d(1, 25, kernel_size=(3, 100), stride=(1, 1), bias=False)
  (fc): Linear(in_features=45, out_features=1, bias=True)
)

In [87]:
def classify_sentence(sentence):
    tokens = sentence.split()
    # Convert to integer representation per token
    token_ints = [glove.stoi.get(tok, len(glove.stoi)-1) for tok in tokens]
    # Convert into a tensor of the shape accepted by the models
    token_tensor = torch.LongTensor(token_ints).view(-1,1)

    with torch.no_grad():
        baseline_output = model_baseline(token_tensor)
        cnn_output = model_cnn(token_tensor)

    # Get probabilities
    baseline_prob = torch.sigmoid(baseline_output)
    cnn_prob = torch.sigmoid(cnn_output)

    # below here google collab auto generated the rest of it
    baseline_class = "Subjective" if baseline_prob > 0.5 else "Objective"
    cnn_class = "Subjective" if cnn_prob > 0.5 else "Objective"

    return baseline_class, baseline_prob.item(), cnn_class, cnn_prob.item()

# Define the Gradio interface (I used generative AI to help fill out the syntax here, wasn't too sure how to)
interface = gr.Interface(
    fn=classify_sentence,
    inputs=gr.Textbox(lines=2, placeholder="Enter a sentence here..."),
    outputs=[
        gr.Textbox(label="Baseline Model Classification"),
        gr.Number(label="Baseline Model Probability"),
        gr.Textbox(label="CNN Model Classification"),
        gr.Number(label="CNN Model Probability")
    ],
    title="Sentence Classification",
)

interface.launch(debug=True)

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://d08df008f8012ad585.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7871 <> https://d08df008f8012ad585.gradio.live


