In [20]:
import numpy as np
import pandas as pd
import torch
from torch_geometric.data import Data
from GNN import Data_processing, Edge_features, Edges, GCN, GAT

## Test trên Data Real

In [21]:
real_users = pd.read_csv("./dataset_test/users.csv")
data_real = Data_processing(real_users)
edges_real = Edges(Edge_features(data_real), 10)

x_real = data_real.drop(['id', 'name', 'screen_name'], axis=1).values
x_real = torch.tensor(x_real, dtype=torch.float)
graph_real = Data(x=x_real, edge_index=edges_real)

model = GAT(graph_real.num_node_features, 32, 2)
model.load_state_dict(torch.load('GAT.pth'))
model.eval()
with torch.no_grad():
    out = model(graph_real.x, graph_real.edge_index)
    predictions = out.argmax(dim=1).cpu().numpy()
    
result_df = pd.DataFrame({'Prediction': predictions})
result_df['Prediction'] = result_df['Prediction'].map({0: 'Real', 1: 'Fake'})
print(result_df['Prediction'].value_counts())

Prediction
Real    3474
Name: count, dtype: int64


## Test trên Data Fake

In [22]:
fake_users = pd.read_csv("./dataset_test/fusers.csv")
data_fake = Data_processing(fake_users)
edges_fake = Edges(Edge_features(data_fake), 10)

x_fake = data_fake.drop(['id', 'name', 'screen_name'], axis=1).values
x_fake = torch.tensor(x_fake, dtype=torch.float)
graph_fake = Data(x=x_fake, edge_index=edges_fake)

model = GAT(graph_fake.num_node_features, 32, 2)
model.load_state_dict(torch.load('GAT.pth'))
model.eval()
with torch.no_grad():
    out = model(graph_fake.x, graph_fake.edge_index)
    predictions = out.argmax(dim=1).numpy()
    
result_df = pd.DataFrame({'Prediction': predictions})
result_df['Prediction'] = result_df['Prediction'].map({0: 'Real', 1: 'Fake'})
result_df['Prediction'].value_counts()

Prediction
Fake    3189
Real     162
Name: count, dtype: int64

In [23]:
test_users = pd.read_csv("./dataset_test/users.csv")
test_fusers = pd.read_csv("./dataset_test/fusers.csv")
test = pd.concat([test_users, test_fusers], ignore_index=True)
labels = test['dataset'].map({v: i for i, v in enumerate(test['dataset'].unique())}).values

data_new = Data_processing(test)
edges_new = Edges(Edge_features(data_new), k=10)
x_new = data_new.drop(['id', 'name', 'screen_name'], axis=1).values
x_new = torch.tensor(x_new, dtype=torch.float)
y = torch.tensor(labels, dtype=torch.long)
graph_new = Data(x=x_new, edge_index=edges_new, y=y)

model = GAT(graph_new.num_node_features, 32, len(np.unique(labels)))
model.load_state_dict(torch.load('GAT.pth'))
model.eval()
with torch.no_grad():
    out = model(graph_new.x, graph_new.edge_index)
    predictions = out.argmax(dim=1).cpu().numpy()
    acc = (predictions == graph_new.y.cpu().numpy()).sum() / graph_new.y.size(0)

acc = round(acc * 100, 2)
print(f'Accuracy on new dataset: {acc}%')

Accuracy on new dataset: 93.07%
