In [1]:
#import statements
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import tensorflow as tf

In [2]:
#labeling the coloumns
cols = ["fLength", "fWidth", "fSize", "fConc", "fConc1", "fAsym", "fM3long", "fM3trans", "fAplha", "fDist", "class"]
file_name = "magic04.data"

df = pd.read_csv(file_name, names = cols)
df["class"] = df["class"].apply(lambda x: 1 if x == 'g' else 0)

In [3]:
#split datasets
train, valid, test = np.split(df.sample(frac = 1), [int(0.6*len(df)), int(0.8*len(df))])

  return bound(*args, **kwds)


In [4]:
#scaling function
#importing random over sampler
def scale_dataset(dataframe, oversample = False):
  x = dataframe[dataframe.columns[:-1]].values
  y = dataframe[dataframe.columns[-1]].values

  scaler = StandardScaler()
  x = scaler.fit_transform(x)

  if oversample:
    ros = RandomOverSampler()
    x, y = ros.fit_resample(x, y)

  data = np.hstack((x, np.reshape(y, (-1, 1))))

  return data, x, y

train, x_train, y_train = scale_dataset(train, oversample = True)
valid, x_valid, y_valid = scale_dataset(valid, oversample = False)
test, x_test, y_test = scale_dataset(test, oversample = False)

In [5]:
x_train

array([[ 0.34469104,  0.58017729,  1.34097828, ...,  1.13441378,
        -0.9940973 ,  0.35152164],
       [ 0.09589706,  0.08802105,  0.65219427, ..., -0.96869791,
        -0.93411692,  1.50173808],
       [-0.40737777,  0.09223026,  0.28356415, ...,  0.84363308,
         2.04101837, -1.98155521],
       ...,
       [-0.75142503, -0.43368827, -0.7093555 , ..., -0.21255878,
         0.41346978,  0.50121816],
       [ 0.1248688 , -0.933986  ,  0.22412808, ...,  0.4060517 ,
         0.54124162, -1.0564446 ],
       [ 1.35228205,  1.9300208 ,  1.6771504 , ...,  2.7106837 ,
         1.12934408, -0.89048107]])

In [6]:
#KNN Implementation
#importing kneigbour classifier
#importing classification report

knn_model = KNeighborsClassifier(n_neighbors = 5)
knn_model.fit(x_train, y_train)

In [7]:
y_pred = knn_model.predict(x_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.75      0.72      0.74      1333
           1       0.85      0.87      0.86      2471

    accuracy                           0.82      3804
   macro avg       0.80      0.80      0.80      3804
weighted avg       0.82      0.82      0.82      3804



In [8]:
#Naive Bayes Implementation
#importing GaussianNB

nb_model = GaussianNB()
nb_model = nb_model.fit(x_train, y_train)

In [9]:
y_pred = nb_model.predict(x_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.71      0.40      0.51      1333
           1       0.74      0.91      0.82      2471

    accuracy                           0.73      3804
   macro avg       0.72      0.65      0.66      3804
weighted avg       0.73      0.73      0.71      3804



In [10]:
#Logistic Regression
#importing logistic regression

logreg_model  = LogisticRegression()
logreg_model = logreg_model.fit(x_train, y_train)

In [11]:
y_pred = logreg_model.predict(x_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.69      0.71      0.70      1333
           1       0.84      0.82      0.83      2471

    accuracy                           0.78      3804
   macro avg       0.76      0.77      0.77      3804
weighted avg       0.79      0.78      0.79      3804



In [12]:
#SVM
#import SVC

svm_model = SVC()
svm_model = svm_model.fit(x_train, y_train)

In [13]:
y_pred = svm_model.predict(x_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.80      0.78      0.79      1333
           1       0.88      0.90      0.89      2471

    accuracy                           0.86      3804
   macro avg       0.84      0.84      0.84      3804
weighted avg       0.85      0.86      0.86      3804



In [14]:
#Neural Network
#import tensorflow

def train_model(x_train, y_train, num_nodes, dropout_prob, lr, batch_size, epochs):
    nn_model = tf.keras.Sequential([
        tf.keras.layers.Dense(num_nodes, activation="relu", input_shape=(10,)),
        tf.keras.layers.Dropout(dropout_prob),
        tf.keras.layers.Dense(num_nodes, activation="relu"),
        tf.keras.layers.Dropout(dropout_prob),
        tf.keras.layers.Dense(1, activation="sigmoid")
    ])

    nn_model.compile(optimizer=tf.keras.optimizers.Adam(lr), loss='binary_crossentropy', metrics=['accuracy'])
    history = nn_model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=0)
    
    return nn_model, history

In [15]:
least_val_loss = float('inf')
least_loss_model = None
epochs = 100
for num_nodes in [16, 32, 64]:
    for dropout_prob in [0, 0.2]:
        for lr in [0.1, 0.005, 0.001]:
            for batch_size in [32, 64, 128]:
                model, history = train_model(x_train, y_train, num_nodes, dropout_prob, lr, batch_size, epochs)
                val_loss = model.evaluate(x_valid, y_valid)[0]
                if val_loss < least_val_loss:
                    least_val_loss = val_loss
                    least_loss_model = model

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 528us/step - accuracy: 0.8607 - loss: 0.3447
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 441us/step - accuracy: 0.8665 - loss: 0.3341
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 455us/step - accuracy: 0.8694 - loss: 0.3081
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 459us/step - accuracy: 0.8726 - loss: 0.2964
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 424us/step - accuracy: 0.8664 - loss: 0.3039
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 564us/step - accuracy: 0.8675 - loss: 0.3130
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 468us/step - accuracy: 0.8737 - loss: 0.2998
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 425us/step - accuracy: 0.8684 - loss: 0.2984
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 420us/step - accuracy: 0.8710 - loss: 0.3043


In [16]:
y_pred = least_loss_model.predict(x_test)
y_pred = (y_pred > 0.5).astype(int).reshape(-1,)

[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 924us/step


In [17]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.89      0.74      0.81      1333
           1       0.87      0.95      0.91      2471

    accuracy                           0.88      3804
   macro avg       0.88      0.84      0.86      3804
weighted avg       0.88      0.88      0.87      3804

