In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report 
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier

# HIP_CER_df = pd.read_pickle('data/HIP_CER.pkl')
# ICC_df = pd.read_pickle('data/ICC_rms.pkl')
# SIMS_df = pd.read_pickle('data/SIMS.pkl')
# BIG_df = pd.read_pickle('data/30k_data.pkl')

In [7]:
class Object:
    def __init__(self, file_path):
        self.data = pd.read_pickle(file_path)
        if 'type' in self.data.columns:
            self.X = self.data.drop('type', axis=1).copy()
            self.y = self.data['type'].copy()
            self.labels = None
            
            self._create_class_labels()
            
            self.X_train = None
            self.y_train = None
            self.X_test = None
            self.y_test = None
        
        else:
            self.X = self.data.copy()
            self.y = None

    def _create_class_labels(self):
        self.labels = LabelEncoder().fit_transform(self.y)

    def split_dataset(self, test_size=0.20):
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.labels, test_size=test_size, random_state=42)
        print(self.X_train.shape, self.X_test.shape)
        
    def __str__(self) -> str:
        return f'Shape of dataset: {self.data.shape}'

MyData = Object('data/ICC_rms.pkl')     
MyData.split_dataset()

(1235, 511) (309, 511)


In [8]:
model = XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.6, gamma=1,
              learning_rate=0.1, max_delta_step=0, max_depth=7,
              min_child_weight=1, missing=np.nan, n_estimators=600, n_jobs=1,
              nthread=None, objective='binary:logistic', random_state=0,
              reg_alpha=0.01, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

model.fit(MyData.X_train, MyData.y_train)
y_pred = model.predict(MyData.X_test)

In [11]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

print(accuracy_score(MyData.y_test, y_pred))
print(roc_auc_score(MyData.y_test, y_pred))

0.7411003236245954
0.7307442102524069


## ANN 

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the neural network model
class BinaryClassifier(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(BinaryClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out

In [None]:
# # Convert the training and testing sets to tensors
# X_train = torch.tensor(X_train.values, dtype=torch.float32)
# y_train = torch.tensor(y_train, dtype=torch.float32)
# X_test = torch.tensor(X_test.values, dtype=torch.float32)
# y_test = torch.tensor(y_test, dtype=torch.float32)

# # Reshape the labels
# y_train = y_train.reshape(-1, 1)
# y_test = y_test.reshape(-1, 1)

In [None]:
# Create an instance of the model
input_size = X_train.shape[1]  # Number of features in the input data
hidden_size = 50  # Number of units in the hidden layer
model = BinaryClassifier(input_size, hidden_size)

# Define the loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training loop
num_epochs = 1500
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train)
    loss = criterion(outputs, y_train)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 100 == 0:
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')

# Test the model
predictions = model(X_test)
y_pred = predictions.round().detach().numpy()

In [None]:
print(accuracy_score(y_test, y_pred))