In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv("data/MT_CSD/Biden/text.csv")
df.head()

Unnamed: 0,id,text
0,1,Biden to pardon all prior federal offenses of ...
1,1-1,"Meanwhile in Britain, it was reported this wee..."
2,1-1-1,Imagine looking at all the US problems and say...
3,1-1-1-1,Naruto-running to reach peak stupid with the l...
4,1-1-1-2,"SCOTUS. Has entered the chat, also those laws ..."


In [3]:
adj = np.zeros((len(df.index), len(df.index)))

for i, id in enumerate(df.id):
    parent_id = "-".join(id.split("-")[:-1])
    parent_i = (df.id == parent_id).to_numpy().nonzero()[0]
    if len(parent_i):
        parent_i = parent_i[0]
        adj[parent_i, i] = 1
        adj[i, parent_i] = 1

In [4]:
import json

In [5]:
def read_labels(dataset):
    stances = np.ones(len(df.index)) * -1
    training_mask = np.zeros(len(df.index)).astype(bool)
    validation_mask = np.zeros(len(df.index)).astype(bool)
    test_mask = np.zeros(len(df.index)).astype(bool)
    for n in json.load(open(f"data/MT_CSD/{dataset}/train.json")):
        id = n['index'][-1]
        i = (df.id == id).to_numpy().nonzero()[0][0]
        stance = n['stance']
        if stance == "favor":
            stances[i] = 0
        elif stance == "against":
            stances[i] = 1
        elif stance == "none":
            stances[i] = 2
        training_mask[i] = True
    for n in json.load(open(f"data/MT_CSD/{dataset}/test.json")):
        id = n['index'][-1]
        i = (df.id == id).to_numpy().nonzero()[0][0]
        stance = n['stance']
        if stance == "favor":
            stances[i] = 0
        elif stance == "against":
            stances[i] = 1
        elif stance == "none":
            stances[i] = 2
        validation_mask[i] = True
    for n in json.load(open(f"data/MT_CSD/{dataset}/valid.json")):
        id = n['index'][-1]
        i = (df.id == id).to_numpy().nonzero()[0][0]
        stance = n['stance']
        if stance == "favor":
            stances[i] = 0
        elif stance == "against":
            stances[i] = 1
        elif stance == "none":
            stances[i] = 2
        test_mask[i] = True
    return stances, training_mask, validation_mask, test_mask

In [6]:
y, training_mask, validation_mask, test_mask = read_labels("Biden")

In [7]:
import torch
from torch import nn
import torch_geometric

  from .autonotebook import tqdm as notebook_tqdm


In [19]:
X = torch.eye(len(df.index))

adj_train = torch.tensor(adj[training_mask][:, training_mask], dtype=torch.float)
adj_val = torch.tensor(adj[validation_mask][:, validation_mask], dtype=torch.float)
adj_test = torch.tensor(adj[test_mask][:, test_mask], dtype=torch.float)

train_edge_index = torch_geometric.utils.dense_to_sparse(adj_train)[0]
val_edge_index = torch_geometric.utils.dense_to_sparse(adj_val)[0]
test_edge_index = torch_geometric.utils.dense_to_sparse(adj_test)[0]

y_train = torch.eye(3)[y[training_mask]]
y_val = torch.eye(3)[y[validation_mask]]
y_test = torch.eye(3)[y[test_mask]]

In [30]:
class Model(nn.Module):

    def __init__(self, input_dim, output_dim):
        super(Model, self).__init__()
        self.dropout = nn.Dropout(0.5)
        self.gcn = torch_geometric.nn.GCNConv(input_dim, input_dim)
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, X, edge_index):
        y = self.dropout(X)
        y = self.gcn(X, edge_index)
        y = torch.relu(y)
        y = self.linear(y)
        y = torch.log_softmax(y, dim=1)
        return y

In [33]:
from sklearn.metrics import accuracy_score

In [34]:
model = Model(len(df.index), 3)
epochs = 100
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    y_pred = model(X, train_edge_index)
    y_pred = y_pred[training_mask]
    loss = criterion(y_pred, y_train)
    loss.backward()
    optimizer.step()
    with torch.no_grad():
        model.eval()
        y_pred = model(X, val_edge_index)
        y_pred = y_pred[validation_mask]
        val_loss = criterion(y_pred, y_val)
        accuracy = accuracy_score(y[validation_mask], y_pred.argmax(dim=1).numpy())
        print(f"Epoch {epoch}, Train Loss {loss.item()}, Validation Loss {val_loss.item()}")
        print(f"Accuracy {accuracy}")

Epoch 0, Train Loss 1.0976523160934448, Validation Loss 1.0204768180847168
Accuracy 0.46411483253588515
Epoch 1, Train Loss 0.9310803413391113, Validation Loss 1.0275682210922241
Accuracy 0.4688995215311005
Epoch 2, Train Loss 0.7469777464866638, Validation Loss 1.088528037071228
Accuracy 0.47129186602870815
Epoch 3, Train Loss 0.563279390335083, Validation Loss 1.1253026723861694
Accuracy 0.47129186602870815
Epoch 4, Train Loss 0.40514376759529114, Validation Loss 1.1536836624145508
Accuracy 0.47368421052631576
Epoch 5, Train Loss 0.3156663477420807, Validation Loss 1.2020868062973022
Accuracy 0.4688995215311005
Epoch 6, Train Loss 0.2584441602230072, Validation Loss 1.2614595890045166
Accuracy 0.4688995215311005
Epoch 7, Train Loss 0.21749328076839447, Validation Loss 1.2870699167251587
Accuracy 0.47129186602870815
Epoch 8, Train Loss 0.18357087671756744, Validation Loss 1.2995564937591553
Accuracy 0.47129186602870815
Epoch 9, Train Loss 0.15570184588432312, Validation Loss 1.3322024