In [3]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import numpy as np
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load and preprocess data
data = pd.read_csv('C:/Users/gangi/OneDrive/Desktop/DL/HateSpeechData.csv')

def simple_preprocess_tweet(tweet):
    tweet = re.sub(r"http\S+", "", tweet)
    tweet = re.sub(r"[^a-zA-Z0-9\s]", "", tweet)
    tweet = tweet.lower()
    tokens = tweet.split()
    stop_words = set(["a", "an", "the", "and", "or", "but", "if", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"])
    filtered_tokens = [word for word in tokens if word not in stop_words]
    return ' '.join(filtered_tokens)

data['cleaned_tweet'] = data['tweet'].apply(simple_preprocess_tweet)

# Feature extraction using TF-IDF
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(data['cleaned_tweet'])

# Compute cosine similarity between tweets
cosine_sim = cosine_similarity(tfidf_matrix)
threshold = 0.5
edges = np.argwhere(cosine_sim > threshold)
edges = edges[edges[:, 0] != edges[:, 1]]

x = torch.tensor(tfidf_matrix.toarray(), dtype=torch.float)
y = torch.tensor(data['class'].values, dtype=torch.long)

num_nodes = x.shape[0]
adj = torch.zeros((num_nodes, num_nodes), dtype=torch.float)
for edge in edges:
    adj[edge[0], edge[1]] = 1
    adj[edge[1], edge[0]] = 1
adj += torch.eye(num_nodes)

class GraphConvLayer(nn.Module):
    def __init__(self, in_features, out_features):
        super(GraphConvLayer, self).__init__()
        self.linear = nn.Linear(in_features, out_features)

    def forward(self, x, adj):
        x = self.linear(x)
        x = torch.mm(adj, x)
        return x

class GCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.conv1 = GraphConvLayer(input_dim, hidden_dim)
        self.conv2 = GraphConvLayer(hidden_dim, output_dim)

    def forward(self, x, adj):
        x = F.relu(self.conv1(x, adj))
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, adj)
        return F.log_softmax(x, dim=1)

model = GCN(input_dim=x.shape[1], hidden_dim=16, output_dim=3)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

def train():
    model.train()
    optimizer.zero_grad()
    out = model(x, adj)
    loss = F.nll_loss(out, y)
    loss.backward()
    optimizer.step()
    return loss.item()

for epoch in range(200):
    loss = train()
    print(f'Epoch {epoch}, Loss: {loss}')

def evaluate():
    model.eval()
    _, pred = model(x, adj).max(dim=1)
    correct = int(pred.eq(y).sum().item())
    acc = correct / len(y)
    return acc

accuracy = evaluate()
print(f'Accuracy: {accuracy}')


Epoch 0, Loss: 1.2946487665176392
Epoch 1, Loss: 1.1836990118026733
Epoch 2, Loss: 1.1209583282470703
Epoch 3, Loss: 1.070948839187622
Epoch 4, Loss: 1.0255149602890015
Epoch 5, Loss: 0.9832141995429993
Epoch 6, Loss: 0.9432388544082642
Epoch 7, Loss: 0.9092664122581482
Epoch 8, Loss: 0.878065288066864
Epoch 9, Loss: 0.8413445949554443
Epoch 10, Loss: 0.8125799894332886
Epoch 11, Loss: 0.7798949480056763
Epoch 12, Loss: 0.7528461217880249
Epoch 13, Loss: 0.7315136790275574
Epoch 14, Loss: 0.7079595923423767
Epoch 15, Loss: 0.6795039772987366
Epoch 16, Loss: 0.6635429859161377
Epoch 17, Loss: 0.6363340616226196
Epoch 18, Loss: 0.6157311797142029
Epoch 19, Loss: 0.6028693914413452
Epoch 20, Loss: 0.5858156681060791
Epoch 21, Loss: 0.565686821937561
Epoch 22, Loss: 0.5477676391601562
Epoch 23, Loss: 0.5349727272987366
Epoch 24, Loss: 0.5063024759292603
Epoch 25, Loss: 0.49690353870391846
Epoch 26, Loss: 0.48477795720100403
Epoch 27, Loss: 0.4635465145111084
Epoch 28, Loss: 0.4563159346580