In [1]:
import csv
import math
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.optimize as opt
import seaborn as sns
from scipy import stats
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.decomposition import IncrementalPCA
from tabulate import tabulate
import tensorflow as tf
import deepchem as dc
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Descriptors
from tensorflow import keras
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.loader import DataLoader
from torch_geometric.data import Data
import warnings
warnings.filterwarnings('ignore')

Instructions for updating:
experimental_relax_shapes is deprecated, use reduce_retracing instead


## Initial Loading

In [3]:

ddi_fp = "Data Files\\drugbank\\drugbank.tab"
ddi = pd.read_csv(ddi_fp, sep='\t')

kaggle_fp = "Data Files\\SMILES-Kaggle\\chembl_22_clean_1576904_sorted_std_final.smi"
smiles = pd.read_csv(kaggle_fp, sep='\t')

ddi["Y"] = ddi["Y"].astype("category")
ddi["Map"] = ddi["Map"].astype("category")

#counting interaction types for potential later weighting
interaction_counts = pd.DataFrame(ddi['Y'].value_counts().rename_axis('value').reset_index(name='count')).sort_values(by='count', ascending=False)
interaction_counts['row_num'] = interaction_counts.index + 1
interaction_counts['log_count'] = np.log(interaction_counts['count'])

#listing longer explanations of interaction types for later use
interaction_types = ddi[['Y','Map']].drop_duplicates(subset=['Y'])

#remove longer name of interaction type from main dataset
ddi = ddi.drop("Map",axis=1)

ddi.head(10)

Unnamed: 0,ID1,ID2,Y,X1,X2
0,DB04571,DB00460,1,CC1=CC2=CC3=C(OC(=O)C=C3C)C(C)=C2O1,COC(=O)CCC1=C2NC(\C=C3/N=C(/C=C4\N\C(=C/C5=N/C...
1,DB00855,DB00460,1,NCC(=O)CCC(O)=O,COC(=O)CCC1=C2NC(\C=C3/N=C(/C=C4\N\C(=C/C5=N/C...
2,DB09536,DB00460,1,O=[Ti]=O,COC(=O)CCC1=C2NC(\C=C3/N=C(/C=C4\N\C(=C/C5=N/C...
3,DB01600,DB00460,1,CC(C(O)=O)C1=CC=C(S1)C(=O)C1=CC=CC=C1,COC(=O)CCC1=C2NC(\C=C3/N=C(/C=C4\N\C(=C/C5=N/C...
4,DB09000,DB00460,1,CC(CN(C)C)CN1C2=CC=CC=C2SC2=C1C=C(C=C2)C#N,COC(=O)CCC1=C2NC(\C=C3/N=C(/C=C4\N\C(=C/C5=N/C...
5,DB11630,DB00460,1,OC1=CC=CC(=C1)C-1=C2\CCC(=N2)\C(=C2/N\C(\C=C2)...,COC(=O)CCC1=C2NC(\C=C3/N=C(/C=C4\N\C(=C/C5=N/C...
6,DB00553,DB00460,1,COC1=C2OC(=O)C=CC2=CC2=C1OC=C2,COC(=O)CCC1=C2NC(\C=C3/N=C(/C=C4\N\C(=C/C5=N/C...
7,DB06261,DB00460,1,[H]N([H])CC(=O)CCC(=O)OCCCCCC,COC(=O)CCC1=C2NC(\C=C3/N=C(/C=C4\N\C(=C/C5=N/C...
8,DB01878,DB00460,1,O=C(C1=CC=CC=C1)C1=CC=CC=C1,COC(=O)CCC1=C2NC(\C=C3/N=C(/C=C4\N\C(=C/C5=N/C...
9,DB00140,DB00460,1,CC1=C(C)C=C2N(C[C@H](O)[C@H](O)[C@H](O)CO)C3=N...,COC(=O)CCC1=C2NC(\C=C3/N=C(/C=C4\N\C(=C/C5=N/C...


In [4]:
ddi.shape

(191808, 5)

In [5]:
#quick function to turn a list of size 1 lists of strings into a list of strings, for later use
def delist(list_of_lists):
    list_of_strings = []
    for inner_list in list_of_lists:
        string = inner_list[0]
        list_of_strings.append(string)
    return list_of_strings

## Preprocessing

In [7]:

# counting drugs by number of mentions in database
old = pd.DataFrame()
old["total"] = ddi['ID1'].value_counts()
old = old.reset_index()
old.columns = ['ID', 'count'] 
new = pd.DataFrame()
new["total"] = ddi['ID2'].value_counts()
new = new.reset_index()
new.columns = ['ID', 'count'] 
drug_counts = pd.merge(old,new,how='outer',on='ID').fillna(0)
drug_counts['total'] = drug_counts['count_x'] + drug_counts['count_y']

drug_counts = drug_counts.sort_values(by='total')
drug_counts_one = pd.DataFrame(drug_counts[drug_counts['total']==1]['ID'])

#removing drugs only in database once
ddi_proc = ddi[ ~ddi['ID1'].isin(drug_counts_one['ID'])]
ddi_proc = ddi_proc[ ~ddi_proc['ID2'].isin(drug_counts_one['ID'])]

#removing one particular drug with a problematic SMILES code
ddi_proc = ddi_proc[ddi_proc['X1']!="OC1=CC=CC(=C1)C-1=C2\CCC(=N2)\C(=C2/N\C(\C=C2)=C(/C2=N/C(/C=C2)=C(\C2=CC=C\-1N2)C1=CC(O)=CC=C1)C1=CC(O)=CC=C1)\C1=CC(O)=CC=C1"]


In [8]:
#create main datasets

data = dc.data.NumpyDataset(X=ddi_proc[['X1','X2']], y=ddi[['Y']])
df = data.to_dataframe()
df = df.sample(frac=1).reset_index(drop=True)

X_one = delist(df[["X1"]].values.tolist())
X_two = delist(df[["X2"]].values.tolist())


search_string = "nan"

count = X_one.count(search_string)
if count > 0:
    print(f"'{search_string}' found {count} times in X_one.")
count = X_two.count(search_string)
if count > 0:
    print(f"'{search_string}' found {count} times in X_two.")

## Featurization and Dimensionality Reduction

In [10]:
#featurize using circular fingerprint

cf_featurizer = dc.feat.CircularFingerprint()

def circular_fingerprint(smiles):
    try:
        mol = cf_featurizer(smiles)
        return mol
    except Exception as e:
        print(f"Error fingerprinting {smiles}: {e}")
        return None  # Skipping invalid SMILES


#other way to featurize a molecule
#cm_featurizer=dc.feat.ConvMolFeaturizer(per_atom_fragmentation=False)

#example process with just 10000 rows
df_example = df.head(10000)
df_example = df_example.reset_index()
df_example.rename(columns={'index':'col'},  inplace=True)

df_example_cf = pd.DataFrame()
df_example_cf['x1_cf'] = df_example['X1'].apply(circular_fingerprint)
df_example_cf['x2_cf'] = df_example['X2'].apply(circular_fingerprint)
df_example_cf['col'] = df_example_cf.index
df_example_cf.dropna(subset=['x1_cf', 'x2_cf'], inplace=True)


df_example_x1_cf = pd.DataFrame(delist(df_example_cf['x1_cf']))
df_example_x1_cf.rename(columns=lambda x: "x1_cf_"+str(x+1), inplace=True)
df_example_x1_cf['col'] = df_example_cf['col']

df_example_x2_cf = pd.DataFrame(delist(df_example_cf['x2_cf']))
df_example_x2_cf.rename(columns=lambda x: "x2_cf_"+str(x+1), inplace=True)
df_example_x2_cf['col'] = df_example_cf['col']


df_example = df_example.merge(df_example_x1_cf, on="col")
df_example = df_example.merge(df_example_x2_cf, on="col")
df_example.dropna(inplace=True)

df_example



Error fingerprinting nan: 'float' object is not iterable




Error fingerprinting nan: 'float' object is not iterable




Error fingerprinting nan: 'float' object is not iterable




Error fingerprinting nan: 'float' object is not iterable




Error fingerprinting nan: 'float' object is not iterable




Error fingerprinting nan: 'float' object is not iterable




Unnamed: 0,col,X1,X2,y,w,ids,x1_cf_1,x1_cf_2,x1_cf_3,x1_cf_4,...,x2_cf_2039,x2_cf_2040,x2_cf_2041,x2_cf_2042,x2_cf_2043,x2_cf_2044,x2_cf_2045,x2_cf_2046,x2_cf_2047,x2_cf_2048
0,0,CC1=CC(=NO1)C(=O)NCCC1=CC=C(C=C1)S(=O)(=O)NC(=...,COCCC1=CC=C(OCC(O)CNC(C)C)C=C1,9,1.0,10045,0.0,0.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,COC1=CC2=C(C=CC=C2CCNC(C)=O)C=C1,[H]C(CCN(C)C)=C1C2=CC=CC=C2COC2=CC=CC=C12,49,1.0,127978,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,NC(=O)C1=CC=CC=C1O,[H][C@@]12C[C@@H](O)[C@](O)(C(=O)CO)[C@@]1(C)C...,49,1.0,110312,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,OC1N=C(C2=CC=CC=C2)C2=C(NC1=O)C=CC(Cl)=C2,[H][C@@]12[C@@H](C)C(=O)[C@H](C)C[C@@](C)(OC)[...,47,1.0,55452,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,COC1=C(OC)C=C2C(=O)C(CC3CCN(CC4=CC=CC=C4)CC3)C...,CC(C)C[C@H](NC(=O)[C@H](CC1=CC=CC=C1)NC(=O)C1=...,47,1.0,65666,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9989,9992,COC1=CC=C(CC(C)(C)NC[C@H](O)C2=C3OCC(=O)NC3=CC...,CCCCC(=O)N(CC1=CC=C(C=C1)C1=CC=CC=C1C1=NNN=N1)...,47,1.0,38875,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9990,9993,[H][C@@]12C[C@@H](C)[C@](OC(=O)C3=CC=CO3)(C(=O...,[H][C@@]12CC[C@@](O)(C#CC)[C@@]1(C)C[C@H](C1=C...,73,1.0,167932,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9991,9994,CN1CCCCC1C(=O)NC1=C(C)C=CC=C1C,FC(F)(F)[C@]1(OC(=O)NC2=C1C=C(Cl)C=C2)C#CC1CC1,49,1.0,94613,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9992,9995,CC1(C)CCC(CN2CCN(CC2)C2=CC=C(C(=O)NS(=O)(=O)C3...,CC1=NC(NC2=NC=C(S2)C(=O)NC2=C(C)C=CC=C2Cl)=CC(...,73,1.0,157778,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
#Basic dimensionality reduction down to 100 components from 4096

df_X = df_example.iloc[:,6:]
df_Y = to_categorical(df_example["y"])

n_components = 100
batch_size = 200

ipca = IncrementalPCA(n_components=n_components)

for i in range(0, df_X.shape[0], batch_size):
    X_batch = df_X[i:i + batch_size]
    ipca.partial_fit(X_batch)

X_transformed = ipca.transform(df_X)

#prove that X's shape has changed
#print("Original shape:", df_X.shape)
#print("Transformed shape:", X_transformed.shape)

df_X_proc = pd.DataFrame(X_transformed)


## Initial basic model implementation

In [13]:
#basic/rough neural network implementation

def calc_layers(X_size, Y_size):
    layers = [X_size+1]
    layer = 2
    while layer <= X_size:
        layer = int(layer * 2)
    layers.append(layer)
    if X_size > Y_size:
        while layer / 2 > Y_size and layer > 2:
            layer = layer / 2
            layers.append(int(layer))
    layers.append(Y_size)
    return layers
        
def DNN(X, Y, Epochs, batchsize, layernum):

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=27)
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    #define layers and nodes in each layer
    input = len(X.columns)
    output = Y.shape[1]
    if layernum=='many':
        layers = calc_layers(input,output)
    elif type(layernum)==int:
        layers = [input]
        for i in range(layernum, 1, -1):
            layer = int(round((i * (input + output) / (layernum+1)), 0))
            if layer > output:
                layers.append(layer)
        layers.append(output)
    else:
        print(f"incorrect layernum {layernum}")
        return None

    model = keras.models.Sequential()

    model.add(Dense(layers[0], activation='relu'))
    model.add(keras.layers.Dropout(0.2))

    for layer_size in layers[1:-1]:
        model.add(Dense(layer_size, activation='relu'))
        model.add(keras.layers.Dropout(0.2))
        
    model.add(Dense(layers[-1], activation='softmax'))
    
    model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy','AUC','precision','recall'])
    
    model.fit(X_train, y_train, epochs=Epochs, batch_size=batchsize, validation_split=0.1)
    
    full_loss, full_accuracy, full_AUC, full_precision, full_recall = model.evaluate(X_test, y_test, verbose=0)
    print(f"Test Accuracy: {full_accuracy}")
    print(f"Test Loss: {full_loss}")
    return model

In [14]:
dr_model = DNN(df_X_proc, df_Y, 5, 1000, 1)

Epoch 1/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 109ms/step - AUC: 0.5188 - accuracy: 0.0187 - loss: 4.7468 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_AUC: 0.6538 - val_accuracy: 0.0637 - val_loss: 4.2790 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 2/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - AUC: 0.6818 - accuracy: 0.0652 - loss: 4.2223 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_AUC: 0.7853 - val_accuracy: 0.1250 - val_loss: 3.8646 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 3/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - AUC: 0.7985 - accuracy: 0.1233 - loss: 3.7903 - precision: 0.0730 - recall: 1.5278e-04 - val_AUC: 0.8558 - val_accuracy: 0.1550 - val_loss: 3.5327 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 4/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - AUC: 0.8652 - accuracy: 0.1586 - loss: 3.4536 - precisi

In [15]:
#comparison to without Dimensionality reduction

full_model = DNN(df_X, df_Y, 5, 1000, 1)

#calc_layers(len(df_X.iloc[:,1:].columns), df_Y.shape[1])

Epoch 1/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 574ms/step - AUC: 0.7386 - accuracy: 0.1490 - loss: 4.3716 - precision: 0.2121 - recall: 0.1012 - val_AUC: 0.7989 - val_accuracy: 0.1963 - val_loss: 4.6688 - val_precision: 0.2200 - val_recall: 0.1488
Epoch 2/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 506ms/step - AUC: 0.9519 - accuracy: 0.5749 - loss: 1.6689 - precision: 0.6542 - recall: 0.5009 - val_AUC: 0.7903 - val_accuracy: 0.2212 - val_loss: 4.8117 - val_precision: 0.2428 - val_recall: 0.1800
Epoch 3/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 501ms/step - AUC: 0.9849 - accuracy: 0.7396 - loss: 0.8653 - precision: 0.7995 - recall: 0.6789 - val_AUC: 0.7903 - val_accuracy: 0.2050 - val_loss: 5.1079 - val_precision: 0.2217 - val_recall: 0.1762
Epoch 4/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 506ms/step - AUC: 0.9948 - accuracy: 0.8589 - loss: 0.4738 - precision: 0.8912 - recall: 0.8093 - val_AUC: 0.

This demonstrates that dimensionality reduction is counterproductive to a neural network in this case.

## Adding in other variables

In [18]:
#code from Beth Farr
def compute_features(smiles):
    try:
        mol = Chem.MolFromSmiles(smiles)
        if not mol:
            return None

        # Computing Molecular Descriptors
        mol_wt = Descriptors.MolWt(mol)
        logp = Descriptors.MolLogP(mol)
        h_donors = Descriptors.NumHDonors(mol)
        h_acceptors = Descriptors.NumHAcceptors(mol)
        tpsa = Descriptors.TPSA(mol)
        return [mol_wt, logp, h_donors, h_acceptors, tpsa]

    except Exception as e:
        print(f"Error computing features for {smiles}: {e}")
        return None  # Skipping invalid SMILES

# Using function to X1 and X2 to extract features
df_feat = df_example
df_feat['features_X1'] = df_example['X1'].apply(compute_features)
df_feat['features_X2'] = df_example['X2'].apply(compute_features)
df_feat = df_feat.dropna(subset=['features_X1', 'features_X2'])
features_X1_df = pd.DataFrame(df_feat['features_X1'].tolist(), columns=['MolWt_X1', 'LogP_X1', 'NumHDonors_X1', 'NumHAcceptors_X1', 'TPSA_X1'])
features_X1_df['col'] = df_feat['col']
features_X2_df = pd.DataFrame(df_feat['features_X2'].tolist(), columns=['MolWt_X2', 'LogP_X2', 'NumHDonors_X2', 'NumHAcceptors_X2', 'TPSA_X2'])
features_X2_df['col'] = df_feat['col']

df_feat = pd.merge(df_feat, pd.merge(features_X1_df, features_X2_df,on='col'), on='col')
df_feat = df_feat.drop(['X1', 'X2', 'w', 'features_X1', 'features_X2'], axis=1)

df_feat_X = df_feat.iloc[:,3:]
df_feat_Y = to_categorical(df_feat["y"])

feat_model = DNN(df_feat_X, df_feat_Y, 5, 1000, 1)


Epoch 1/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 440ms/step - AUC: 0.7251 - accuracy: 0.1518 - loss: 4.4439 - precision: 0.2201 - recall: 0.1032 - val_AUC: 0.8084 - val_accuracy: 0.2163 - val_loss: 4.3957 - val_precision: 0.2432 - val_recall: 0.1675
Epoch 2/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 375ms/step - AUC: 0.9531 - accuracy: 0.5751 - loss: 1.6504 - precision: 0.6511 - recall: 0.5116 - val_AUC: 0.7755 - val_accuracy: 0.2113 - val_loss: 4.9936 - val_precision: 0.2318 - val_recall: 0.1787
Epoch 3/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 447ms/step - AUC: 0.9861 - accuracy: 0.7597 - loss: 0.8025 - precision: 0.8196 - recall: 0.7044 - val_AUC: 0.7838 - val_accuracy: 0.2225 - val_loss: 5.1533 - val_precision: 0.2348 - val_recall: 0.1887
Epoch 4/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 384ms/step - AUC: 0.9946 - accuracy: 0.8604 - loss: 0.4577 - precision: 0.8930 - recall: 0.8300 - val_AUC: 0.

## Adding In GNN

In [20]:
#Creating GNN Model
class GNN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GNN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

#Using PyTorch Geometric Graph Format
def create_graph_features(features, labels):
    num_nodes = len(features)

    # creating the nodes
    edge_index = torch.tensor(
        np.array([[i, i] for i in range(num_nodes)]).T, dtype=torch.long
    )

    graphs = []
    for i in range(len(features)):
        x = torch.tensor(features[i], dtype=torch.float).unsqueeze(0)
        y = torch.tensor([labels[i]], dtype=torch.long)
        graph = Data(x=x, edge_index=edge_index, y=y)
        graphs.append(graph)
    return graphs

#Training the GNN Model
def GNN_train(model, train_loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(torch.device("cpu"))  # Ensure correct device
        optimizer.zero_grad()
        out = model(data)
        loss = criterion(out.squeeze(1), data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

#Evaluating the Model
def GNN_test(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in loader:
            data = data.to(torch.device("cpu"))  # Ensure correct device
            out = model(data)
            pred = out.argmax(dim=1)
            correct += (pred == data.y).sum().item()
            total += data.y.size(0)
    return correct / total

def perform_GNN(X, Y, epochs, batchsize):

    #Train-Test Split
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
                                                    
    train_graphs = create_graph_features(X_train, y_train)
    test_graphs = create_graph_features(X_test, y_test)

    # Creating data loaders
    train_loader = DataLoader(train_graphs, batch_size=batchsize, shuffle=True)
    test_loader = DataLoader(test_graphs, batch_size=batchsize, shuffle=False)

    #Initialising the Model
    input_dim = X.shape[1]
    output_dim = len(np.unique(Y))
    hidden_dim = int(round((input_dim + output_dim)/2,0))
    
    model = GNN(input_dim, hidden_dim, output_dim)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    criterion = torch.nn.CrossEntropyLoss()

    #Running Training
    for epoch in range(epochs):
        loss = GNN_train(model, train_loader, optimizer, criterion)
        acc = GNN_test(model, test_loader)
        print(f"Epoch {epoch+1}: Loss={loss:.4f}, Test Accuracy={acc:.4f}")
    
    #Saving the model for future use
    torch.save(model.state_dict(), "gnn_model.pth")
    

In [21]:
label_encoder = LabelEncoder()
graph_y = label_encoder.fit_transform(df_feat['y'])

perform_GNN(df_feat_X.values, graph_y, 5, 1000)

Epoch 1: Loss=178.0380, Test Accuracy=0.0160
Epoch 2: Loss=61.2041, Test Accuracy=0.2806
Epoch 3: Loss=4.8429, Test Accuracy=0.2446
Epoch 4: Loss=3.1408, Test Accuracy=0.1891
Epoch 5: Loss=2.7708, Test Accuracy=0.3202
