# Experiments

This notebook trains and evaluates several different models on the graph generated from the create_graph notebook.

In [1]:
import getpass
import pandas as pd
import numpy as np
import ast
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import confusion_matrix
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

import torch
import torch_geometric
import torch.nn.functional as F
from torch.utils.data import WeightedRandomSampler
from torch_geometric.nn.models import GAT
from torch_geometric.nn import GCNConv



In [None]:
import wandb

## WandB
This project uses weights and biases (wandb) to store its training run data. Please install wandb and run `wandb login` to enable wandb logging of the training.

In [2]:
user = getpass.getuser()
# Change dirpath to the location of the TwiBot22 dataset on your device
dirpath = '/scratch/{user}/datasets/TwiBot22/'

user = pd.read_csv(f'{dirpath}/user_final.csv')
graph = pd.read_csv(f'{dirpath}/graph_cleaned.csv', float_precision = 'round_trip')

In [3]:
# Function to standardize target_user_id format
def standardize_target_id(target_id):
    try:
        # If it's already a string representation of a list
        if isinstance(target_id, str):
            if target_id.startswith('['):
                return ast.literal_eval(target_id)
            else:
                return [int(float(target_id))]
        # If it's a float
        elif isinstance(target_id, float):
            if pd.isna(target_id):
                return []
            return [int(target_id)]
        # If it's already a list
        elif isinstance(target_id, list):
            return target_id
        # If it's an integer
        elif isinstance(target_id, (int, np.integer)):
            return [target_id]
        else:
            print(type(target_id), target_id)
            return []
    except Exception as e:
        print(f"Exception on target id {target_id}: {e}")
        return []



In [4]:
# Clean the graph DataFrame
print("Original length:", len(graph))

# Standardize target_user_id format
graph['target_user_id'] = graph['target_user_id'].apply(standardize_target_id)
graph['target_user_id'] = graph['target_user_id'].apply(lambda x: np.array(x, dtype=np.int64))

# Remove rows where target_user_id is empty
graph = graph[graph['target_user_id'].apply(len) > 0]

# Remove rows where source_user_id is not in valid_user_ids
valid_user_ids = set(user['source_user_id'])
graph = graph[graph['source_user_id'].isin(valid_user_ids)]

print("Length after cleaning:", len(graph))

# Verify the cleaning worked
print("\nSample of cleaned target_user_id types:")
print(graph['target_user_id'].head().apply(type))
print("\nSample of cleaned target_user_id values:")
print(graph['target_user_id'].head())

Original length: 615613
Length after cleaning: 510190

Sample of cleaned target_user_id types:
0     <class 'numpy.ndarray'>
1     <class 'numpy.ndarray'>
12    <class 'numpy.ndarray'>
13    <class 'numpy.ndarray'>
14    <class 'numpy.ndarray'>
Name: target_user_id, dtype: object

Sample of cleaned target_user_id values:
0              [3123238004]
1               [343627165]
12    [1056221232822022144]
13     [993279617938042880]
14               [15245653]
Name: target_user_id, dtype: object


In [5]:
# Now proceed with the original code
user_to_index = {user_id: idx for idx, user_id in enumerate(user['source_user_id'])}
labels = torch.tensor(user['label'].map({'human': 0, 'bot': 1}).values, dtype=torch.long)

# Rest of the code remains the same...
# Create edge list properly
edge_list = []
edge_weights = {}

for _, row in tqdm(graph.iterrows()):
    source_id = row['source_user_id']
    target_ids = row['target_user_id']
    #target_ids = ast.literal_eval(row['target_user_id']) if isinstance(row['target_user_id'], str) else [row['target_user_id']]
    
    source_idx = user_to_index[source_id]
    for target_id in target_ids:
        #print(target_id, target_ids)
        if target_id in user_to_index:  # Check if target exists in our user set
            target_idx = user_to_index[target_id]
            pair = (source_idx, target_idx)
            if pair not in edge_weights:
                edge_list.append(pair)
                edge_weights[pair] = 1
            else:
                edge_weights[pair] += 1

edge_index = torch.tensor(edge_list, dtype=torch.long).t().contiguous()
edge_weight = torch.tensor(list(edge_weights.values()), dtype=torch.float)

510190it [00:22, 22270.51it/s]


In [6]:
# Create node features using tweet embeddings
node_features = []
for user_id in tqdm(user['source_user_id']):
    user_tweets = graph[graph['source_user_id'] == user_id]
    if len(user_tweets) > 0:
        # Filter out NaN values and safely evaluate the embeddings
        valid_embeddings = []
        for emb in user_tweets['tweet_embedding'].values:
            try:
                if pd.notna(emb):  # Check if embedding is not NaN
                    emb_str = emb.replace('[', '').replace(']', '')
                    emb_values = [float(x) for x in emb_str.split()]
                    valid_embeddings.append(emb_values)
            except Exception as e:
                print(e)
                continue
        
        if valid_embeddings:  # If we have any valid embeddings
            embeddings = np.vstack(valid_embeddings)
            node_features.append(torch.tensor(embeddings.mean(axis=0)))
        else:
            node_features.append(torch.zeros(100))  # Default for users with no valid embeddings
    else:
        node_features.append(torch.zeros(100))  # Default for users with no tweets

node_features = torch.stack(node_features)

100%|██████████| 189091/189091 [01:33<00:00, 2021.41it/s]


In [7]:
# Count zero tensors
zero_tensor = torch.zeros(100)  # Assuming 100-dimensional embeddings
zero_count = sum((tensor == zero_tensor).all().item() for tensor in node_features)
total_count = len(node_features)

print(f"Number of zero tensors: {zero_count} out of {total_count} ({(zero_count/total_count)*100:.2f}%)")

# To see which user IDs have zero embeddings:
zero_indices = [i for i, tensor in enumerate(node_features) if (tensor == zero_tensor).all().item()]
zero_user_ids = user['source_user_id'].iloc[zero_indices]

print("\nFirst few user IDs with zero embeddings:")
print(zero_user_ids.head())

Number of zero tensors: 70446 out of 189091 (37.26%)

First few user IDs with zero embeddings:
3     138814032
4     457554412
5    2465283662
6     284870222
9      83389771
Name: source_user_id, dtype: int64


In [8]:
# Feature infilling if text embedding = 0

# Select numerical columns from user DataFrame
numerical_features = user.select_dtypes(include=['float64', 'int64']).columns
user_numerical = user[numerical_features]

# Manual normalization function
def normalize_features(df):
    result = df.copy()
    for column in df.columns:
        mean = df[column].mean()
        std = df[column].std()
        if std != 0:
            result[column] = (df[column] - mean) / std
        else:
            result[column] = 0  # For constant columns
    return result

# Normalize the features
user_features = normalize_features(user_numerical)
user_features = torch.tensor(user_features.values, dtype=torch.float)

# Combine with tweet embeddings
combined_features = []
zero_tensor = torch.zeros(node_features.size(1))

for i, user_id in enumerate(user['source_user_id']):
    if (node_features[i] == zero_tensor).all():
        # If no tweet embeddings, use processed user features
        # Pad or truncate user features to match embedding dimension if necessary
        if user_features.size(1) > 100:
            combined_features.append(user_features[i][:100])
        elif user_features.size(1) < 100:
            padded = torch.zeros(100)
            padded[:user_features.size(1)] = user_features[i]
            combined_features.append(padded)
        else:
            combined_features.append(user_features[i])
    else:
        # If we have tweet embeddings, use those
        combined_features.append(node_features[i])

combined_features = torch.stack(combined_features)

# Verify results
zero_tensor = torch.zeros(combined_features.size(1))
zero_count = sum((tensor == zero_tensor).all().item() for tensor in combined_features)
total_count = len(combined_features)

print(f"\nNumber of zero tensors: {zero_count} out of {total_count} ({(zero_count/total_count)*100:.2f}%)")
print(f"Feature dimension: {combined_features.size(1)}")

# Print a sample of non-zero features to verify they look correct
non_zero_idx = (combined_features != zero_tensor).any(dim=1).nonzero().squeeze()
if len(non_zero_idx) > 0:
    print("\nSample non-zero features (first 10 values):")
    print(combined_features[non_zero_idx[0]][:10])


Number of zero tensors: 0 out of 189091 (0.00%)
Feature dimension: 100

Sample non-zero features (first 10 values):
tensor([-0.5917, -0.1012,  0.2680,  0.1024, -0.0170,  0.2148, -0.1373, -0.5112,
         0.1039, -0.4348], dtype=torch.float64)


In [10]:
class GATModel(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers=2, heads=1, dropout=0.6):
        super(GATModel, self).__init__()
        self.gat = GAT(
            in_channels=in_channels,
            hidden_channels=hidden_channels,  
            out_channels=out_channels, 
            num_layers=num_layers,
            heads=heads,
            dropout=dropout,
            act=F.elu,
        )

    def forward(self, x, edge_index, edge_weight=None):
        return self.gat(x, edge_index, edge_weight=edge_weight)


In [11]:
# Example split (80% train, 10% val, 10% test)
train_mask, test_mask = train_test_split(torch.arange(labels.size(0)), test_size=0.2, stratify=labels)
val_mask, test_mask = train_test_split(test_mask, test_size=0.5, stratify=labels[test_mask])

# Convert to boolean masks
train_mask = torch.zeros_like(labels, dtype=torch.bool).scatter_(0, train_mask, True)
val_mask = torch.zeros_like(labels, dtype=torch.bool).scatter_(0, val_mask, True)
test_mask = torch.zeros_like(labels, dtype=torch.bool).scatter_(0, test_mask, True)

In [20]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data, labels, edge_index, edge_weight = combined_features.to(device), labels.to(device), edge_index.to(device), edge_weight.to(device)
data = data.float()  # Convert node features to float32
edge_index = edge_index.long()  # Edge indices should be long
labels = labels.long() 

data = (data - data.mean(dim=0)) / data.std(dim=0)
data = torch.nan_to_num(data, nan=0.0)  # Replace NaN with 0

In [21]:
# Label balance

num_bots = torch.sum(labels == 1)
num_humans = torch.sum(labels == 0)
print(f"""Human tweets: {num_humans-num_bots}, Bot tweets: {num_bots}
Bot percentage: {100*num_bots / len(labels)}
""")


Human tweets: 156139, Bot tweets: 16476
Bot percentage: 8.713264465332031



In [10]:
len(labels)

189091

### Only need to run 1 of the following two cells containing GAT instantiation and training loop. The difference is that one set logs to wandb and the other doesn't.

In [23]:
# Initialize the model
model = GATModel(
    in_channels=data.size(1),
    hidden_channels=64,
    out_channels=2,
    num_layers=2,
    heads=4,
    dropout=0.6
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# Training loop
for epoch in range(200):
    model.train()
    optimizer.zero_grad()
    
    # Forward pass
    out = model(data, edge_index, edge_weight=edge_weight)
    
    # Compute loss on training nodes
    loss = criterion(out[train_mask], labels[train_mask])
    loss.backward()
    optimizer.step()
    
    # Validation
    model.eval()
    with torch.no_grad():
        val_loss = criterion(out[val_mask], labels[val_mask])
        val_acc = (out[val_mask].argmax(dim=1) == labels[val_mask]).float().mean()
    
    #if epoch % 20 == 0:
    #print(f'Epoch: {epoch+1}, Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, Val Acc: {val_acc:.4f}')


In [24]:
lr = 1e-3
epochs = 1000
heads = 4
dropout = 0.6
weight_decay = 5e-4

# Initialize wandb
wandb.init(
    project="gat-classification",  # Choose your project name
    config={
        "architecture": "GAT",
        "in_channels": data.size(1),
        "hidden_channels": 64,
        "out_channels": 2,
        "num_layers": 2,
        "heads": heads,
        "dropout": dropout,
        "learning_rate": lr,
        "weight_decay": weight_decay,
        "epochs": epochs
    }
)

model = GATModel(
    in_channels=data.size(1),
    hidden_channels=64,
    out_channels=2,
    num_layers=2,
    heads=heads,
    dropout=dropout
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
criterion = torch.nn.CrossEntropyLoss()

# Training loop
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    
    # Forward pass
    out = model(data, edge_index, edge_weight=edge_weight)
    
    # Compute training metrics
    train_loss = criterion(out[train_mask], labels[train_mask])
    train_acc = (out[train_mask].argmax(dim=1) == labels[train_mask]).float().mean()
    
    # Backward pass
    train_loss.backward()
    optimizer.step()
    
    # Validation
    model.eval()
    with torch.no_grad():
        val_loss = criterion(out[val_mask], labels[val_mask])
        val_acc = (out[val_mask].argmax(dim=1) == labels[val_mask]).float().mean()
    
    # Log metrics to wandb
    wandb.log({
        "epoch": epoch,
        "train_loss": train_loss.item(),
        "train_acc": train_acc.item(),
        "val_loss": val_loss.item(),
        "val_acc": val_acc.item()
    })

wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mbae9wk[0m ([33mcrg[0m). Use [1m`wandb login --relogin`[0m to force relogin


0,1
epoch,▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇███
train_acc,▁▃▅▅▆▆▇▇▇▇██████████████████████████████
train_loss,█▇▇▆▅▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▃▃▅▅▆▇▇▇▇▇▇████████████████████████████
val_loss,██▇▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,999.0
train_acc,0.91382
train_loss,0.29966
val_acc,0.91359
val_loss,0.29964


In [25]:
# Evaluate our GAT model
model.eval()

# Forward pass on the test set
with torch.no_grad():
    out = model(data, edge_index)
    
    # Get predicted class by taking the argmax across the output logits
    _, predicted = out.max(dim=1)
    print(predicted.shape)
    print(labels[test_mask].shape)
    
    # Calculate accuracy
    correct = (predicted[test_mask] == labels[test_mask]).sum().item()
    total = test_mask.sum().item()
    accuracy = correct / total
    
    # Calculate R2
    predicted_probs = torch.softmax(out[test_mask], dim=1)[:, 1].cpu().numpy()
    true_values = labels[test_mask].cpu().numpy()
    r2 = r2_score(true_values, predicted_probs)
    
    print(f"Test Accuracy: {accuracy * 100:.2f}%")
    print(f"R² Score: {r2:.4f}")


torch.Size([189091])
torch.Size([18910])
Test Accuracy: 91.37%
R² Score: 0.0622


In [26]:
cm = confusion_matrix(labels[test_mask].cpu().numpy(), predicted[test_mask].cpu().numpy())
print(cm)

[[17231    31]
 [ 1600    48]]


In [36]:
class GCNCluster(torch.nn.Module):
    def __init__(self, num_features, hidden_channels=64):
        super(GCNCluster, self).__init__()
        # First Graph Convolution Layer
        self.conv1 = GCNConv(num_features, hidden_channels)
        # Second Graph Convolution Layer
        self.conv2 = GCNConv(hidden_channels, 2)  # 2 classes (binary)

    def forward(self, x, edge_index, edge_weight=None):
        # First layer with ReLU activation
        x = self.conv1(x, edge_index, edge_weight)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        
        # Second layer
        x = self.conv2(x, edge_index, edge_weight)
        
        return F.log_softmax(x, dim=1)

# Training setup
def train_model(data, labels, edge_index, edge_weight, num_epochs=200):
    model = GCNCluster(num_features=data.shape[1]).cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    
    model.train()
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        out = model(data, edge_index, edge_weight)
        loss = F.nll_loss(out, labels)
        loss.backward()
        optimizer.step()
        
        if epoch % 10 == 0:
            pred = out.argmax(dim=1)
            correct = (pred == labels).sum()
            acc = int(correct) / len(labels)
            print(f'Epoch {epoch:3d}, Loss: {loss:.4f}, Accuracy: {acc:.4f}')
    
    return model

In [37]:
gcn_model = train_model(data, labels, edge_index, edge_weight)

Epoch   0, Loss: 1.6052, Accuracy: 0.3551
Epoch  10, Loss: 0.5045, Accuracy: 0.9013
Epoch  20, Loss: 0.4068, Accuracy: 0.8968
Epoch  30, Loss: 0.3515, Accuracy: 0.9060
Epoch  40, Loss: 0.3277, Accuracy: 0.9102
Epoch  50, Loss: 0.3139, Accuracy: 0.9130
Epoch  60, Loss: 0.3067, Accuracy: 0.9148
Epoch  70, Loss: 0.2998, Accuracy: 0.9157
Epoch  80, Loss: 0.2951, Accuracy: 0.9162
Epoch  90, Loss: 0.2908, Accuracy: 0.9165
Epoch 100, Loss: 0.2883, Accuracy: 0.9170
Epoch 110, Loss: 0.2847, Accuracy: 0.9173
Epoch 120, Loss: 0.2827, Accuracy: 0.9175
Epoch 130, Loss: 0.2801, Accuracy: 0.9180
Epoch 140, Loss: 0.2781, Accuracy: 0.9184
Epoch 150, Loss: 0.2769, Accuracy: 0.9184
Epoch 160, Loss: 0.2744, Accuracy: 0.9186
Epoch 170, Loss: 0.2737, Accuracy: 0.9188
Epoch 180, Loss: 0.2725, Accuracy: 0.9190
Epoch 190, Loss: 0.2711, Accuracy: 0.9193


In [38]:
# Evaluate our GCN model
gcn_model.eval()

# Forward pass on the test set
with torch.no_grad():
    out = gcn_model(data, edge_index) 
    
    # Get predicted class by taking the argmax across the output logits
    _, predicted = out.max(dim=1)
    print(predicted.shape)
    print(labels[test_mask].shape)
    
    # Calculate accuracy
    correct = (predicted[test_mask] == labels[test_mask]).sum().item()
    total = test_mask.sum().item()
    accuracy = correct / total
    
    # Calculate R2
    predicted_probs = torch.softmax(out[test_mask], dim=1)[:, 1].cpu().numpy()
    true_values = labels[test_mask].cpu().numpy()
    r2 = r2_score(true_values, predicted_probs)
    
    print(f"Test Accuracy: {accuracy * 100:.2f}%")
    print(f"R² Score: {r2:.4f}")


torch.Size([189091])
torch.Size([18910])
Test Accuracy: 92.15%
R² Score: 0.1390


In [41]:
# k means implementation
class ClusteringModel:
    def __init__(self, n_clusters=2):
        self.n_clusters = n_clusters
        self.kmeans = KMeans(n_clusters=n_clusters, random_state=42)
        self.scaler = StandardScaler()
        
    def fit(self, data, labels):
        # Scale data and generate k means cluster map
        data_cpu = data.cpu().numpy()
        labels_cpu = labels.cpu().numpy()
        
        data_scaled = self.scaler.fit_transform(data_cpu)
        self.kmeans.fit(data_scaled)
        cluster_labels = self.kmeans.labels_
        
        self.cluster_map = {}
        for cluster in range(self.n_clusters):
            mask = (cluster_labels == cluster)
            if mask.any():
                true_labels = labels_cpu[mask]
                most_common = np.bincount(true_labels).argmax()
                self.cluster_map[cluster] = most_common
                
        return self
    
    def predict(self, data):
        # Converting from tensor to numpy so that the data is compatible
        #    with sklearn functions
        data_cpu = data.cpu().numpy()
        data_scaled = self.scaler.transform(data_cpu)
        
        cluster_labels = self.kmeans.predict(data_scaled)
        predictions = np.array([self.cluster_map[label] for label in cluster_labels])
        return torch.tensor(predictions, device=data.device)

def train_cluster_model(data, labels):
    model = ClusteringModel(n_clusters=2)
    model.fit(data, labels)
    
    # Calculate accuracy
    predictions = model.predict(data)
    acc = (predictions == labels).float().mean()
    print(f'Clustering Accuracy: {acc:.4f}')
    
    return model

In [42]:
kmeans_model = train_cluster_model(data, labels)

  super()._check_params_vs_input(X, default_n_init=10)


Clustering Accuracy: 0.9129


In [65]:
# Create a balanced cluster net, we implemented this to be a baseline that weighted 
#    bot and human tweets more equally during training to address the class imbalance
class BalancedClusterNet(nn.Module):
    def __init__(self, input_dim, hidden_dim=64):
        super(BalancedClusterNet, self).__init__()
        self.layer1 = nn.Linear(input_dim, hidden_dim)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.layer2 = nn.Linear(hidden_dim, hidden_dim // 2)
        self.bn2 = nn.BatchNorm1d(hidden_dim // 2)
        self.layer3 = nn.Linear(hidden_dim // 2, 1)
        
    def forward(self, x):
        x = self.bn1(F.relu(self.layer1(x)))
        x = F.dropout(x, p=0.3, training=self.training)
        x = self.bn2(F.relu(self.layer2(x)))
        x = self.layer3(x)
        return x.squeeze(-1)  # Ensure output is [batch_size]

def train_balanced_model(data, labels, num_epochs=200):
    device = data.device
    model = BalancedClusterNet(input_dim=data.shape[1]).to(device)
    
    # Compute class weights
    num_samples = len(labels)
    num_class_0 = (labels == 0).sum().item()
    num_class_1 = (labels == 1).sum().item()
    
    # Create weighted loss function
    pos_weight = torch.tensor([num_class_0/num_class_1]).to(device)
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    
    # Create weighted sampler
    sample_weights = torch.zeros_like(labels, dtype=torch.float32)
    sample_weights[labels == 0] = 1.0 / num_class_0
    sample_weights[labels == 1] = 1.0 / num_class_1
    sampler = WeightedRandomSampler(sample_weights, num_samples=num_samples, replacement=True)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-5)
    
    model.train()
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, labels.float())
        loss.backward()
        optimizer.step()
        
        if epoch % 10 == 0:
            model.eval()
            with torch.no_grad():
                pred = (torch.sigmoid(outputs) > 0.5).long()
                
                # Calculate metrics for both classes
                acc_class0 = ((pred == labels) & (labels == 0)).float().sum() / (labels == 0).float().sum()
                acc_class1 = ((pred == labels) & (labels == 1)).float().sum() / (labels == 1).float().sum()
                
                # Calculate F1 score
                tp = ((pred == 1) & (labels == 1)).float().sum()
                fp = ((pred == 1) & (labels == 0)).float().sum()
                fn = ((pred == 0) & (labels == 1)).float().sum()
                precision = tp / (tp + fp + 1e-10)
                recall = tp / (tp + fn + 1e-10)
                f1 = 2 * (precision * recall) / (precision + recall + 1e-10)
                
                print(f'Epoch {epoch:3d}, Loss: {loss:.4f}')
                print(f'Class 0 Accuracy: {acc_class0:.4f}, Class 1 Accuracy: {acc_class1:.4f}')
                print(f'F1 Score: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}\n')
            model.train()
    
    return model

def evaluate_model(model, data, labels):
    model.eval()
    with torch.no_grad():
        outputs = model(data)
        pred = (torch.sigmoid(outputs) > 0.5).long()
        
        # Calculate confusion matrix
        tp = ((pred == 1) & (labels == 1)).float().sum()
        tn = ((pred == 0) & (labels == 0)).float().sum()
        fp = ((pred == 1) & (labels == 0)).float().sum()
        fn = ((pred == 0) & (labels == 1)).float().sum()
        
        # Calculate metrics
        accuracy = (tp + tn) / len(labels)
        precision = tp / (tp + fp + 1e-10)
        recall = tp / (tp + fn + 1e-10)
        f1 = 2 * (precision * recall) / (precision + recall + 1e-10)
        
        print("\nFinal Evaluation:")
        print(f"Accuracy: {accuracy:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")
        print(f"F1 Score: {f1:.4f}")
        print("\nConfusion Matrix:")
        print(f"TN: {tn:.0f}, FP: {fp:.0f}")
        print(f"FN: {fn:.0f}, TP: {tp:.0f}")

In [66]:
weighted_model = train_balanced_model(data, labels)

Epoch   0, Loss: 1.3171
Class 0 Accuracy: 0.5834, Class 1 Accuracy: 0.4361
F1 Score: 0.1504, Precision: 0.0909, Recall: 0.4361

Epoch  10, Loss: 1.0653
Class 0 Accuracy: 0.5584, Class 1 Accuracy: 0.7808
F1 Score: 0.2437, Precision: 0.1444, Recall: 0.7808

Epoch  20, Loss: 0.9441
Class 0 Accuracy: 0.6784, Class 1 Accuracy: 0.7286
F1 Score: 0.2858, Precision: 0.1778, Recall: 0.7286

Epoch  30, Loss: 0.8801
Class 0 Accuracy: 0.7011, Class 1 Accuracy: 0.7353
F1 Score: 0.3022, Precision: 0.1902, Recall: 0.7353

Epoch  40, Loss: 0.8492
Class 0 Accuracy: 0.6888, Class 1 Accuracy: 0.7629
F1 Score: 0.3038, Precision: 0.1896, Recall: 0.7629

Epoch  50, Loss: 0.8339
Class 0 Accuracy: 0.7677, Class 1 Accuracy: 0.6925
F1 Score: 0.3357, Precision: 0.2215, Recall: 0.6925

Epoch  60, Loss: 0.8204
Class 0 Accuracy: 0.7203, Class 1 Accuracy: 0.7530
F1 Score: 0.3216, Precision: 0.2044, Recall: 0.7530

Epoch  70, Loss: 0.8127
Class 0 Accuracy: 0.7622, Class 1 Accuracy: 0.7187
F1 Score: 0.3414, Precision: 

In [69]:
evaluate_model(weighted_model, data, labels)


Final Evaluation:
Accuracy: 0.7799
Precision: 0.2565
Recall: 0.8040
F1 Score: 0.3889

Confusion Matrix:
TN: 134221, FP: 38394
FN: 3230, TP: 13246
