In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("/kaggle/input/magic-gamma-telescope-dataset/telescope_data.csv", index_col = 0)

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.duplicated().sum()

In [None]:
df['class'] = (df['class'] == 'g').astype(int)

In [None]:
(df['class'].value_counts()/df.shape[0]*100).plot.pie(autopct = '%1.1f%%', explode = [0.0,0.02], shadow = True)

In [None]:
df['class'].value_counts()

In [None]:
features = df.columns

In [None]:
for feature in features[:-1]:
    plt.hist(df[df['class'] == 1][feature], color = 'red', label = 'Gamma', alpha = 0.5, density = True)
    plt.hist(df[df['class'] == 0][feature], color = 'purple', label = 'Hardon', alpha = 0.5, density = True)
    plt.title(feature)
    plt.ylabel("Probability")
    plt.xlabel(feature)
    plt.legend()
    plt.show()
    

# Training and Test Set
> Also scaling the data set

In [None]:

X = df[features[:-1]]
y = df['class']

In [None]:
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler

def scaleData(dataframe, overSample = False): 
    X = dataframe[dataframe.columns[:-1]].values
    y = dataframe[dataframe.columns[-1]].values
    
    if overSample:
        ROS = RandomOverSampler()
        X ,y = ROS.fit_resample(X,y)
    
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    
    data = np.hstack((X, np.reshape(y, (-1,1))))
    
    return X, y, data

In [None]:
X, y, data = scaleData(df, overSample = True)

In [None]:
from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

# KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

In [None]:
knnModel = KNeighborsClassifier(n_neighbors = 3)
knnModel.fit(X_train, y_train)

In [None]:
y_pred = knnModel.predict(X_test)

In [None]:
print(classification_report(y_test, y_pred))

# Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
nb_model = GaussianNB()
nb_model = nb_model.fit(X_train, y_train)

In [None]:
NB_y_pred = nb_model.predict(X_test)

In [None]:
print(classification_report(y_test, NB_y_pred))

# Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
lgModel = LogisticRegression()
lgModel = lgModel.fit(X_train, y_train)

In [None]:
lg_y_pred = lgModel.predict(X_test)
print(classification_report(y_test, lg_y_pred))

# Support Vector Machine (SVM)

In [None]:
from sklearn.svm import SVC

In [None]:
svmModel = SVC()
svmModel = svmModel.fit(X_train, y_train)

In [None]:
svm_y_pred = svmModel.predict(X_test)
print(classification_report(y_test, svm_y_pred))

# **Neural Networks**

In [None]:
import tensorflow as tf

In [None]:
def train_NN_Model(X_train, y_train, num_nodes, droput_prob, learnRate, batch_size, epochs):
    nn_model = tf.keras.Sequential([
        tf.keras.layers.Dense(num_nodes, activation = 'relu', input_shape = [10]),
        tf.keras.layers.Dropout(dropout_prob),
        tf.keras.layers.Dense(num_nodes, activation = 'relu'),
        tf.keras.layers.Dropout(dropout_prob),
        tf.keras.layers.Dense(1, activation = 'sigmoid')
        ])

    nn_model.compile( optimizer = tf.keras.optimizers.Adam(learnRate), loss = 'binary_crossentropy',
                metrics = ['accuracy'])
    history = nn_model.fit(
    X_train, y_train, epochs = epochs, batch_size = batch_size, validation_split = 0.2, verbose = 0)
    
    return nn_model, history

In [None]:
#history = nn_model.fit(
    X_train, y_train, epochs = 100, batch_size = 32, validation_split = 0.2, verbose = 0
)

In [None]:
def plotLoss(history):
    fig, (ax1, ax2) = plt.subplots(1, 2,figsize = (10,4))
    ax1.plot(history.history['loss'], label = 'Loss')
    ax1.plot(history.history['val_loss'], label = 'Val_Loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel("Binary Crossentropy")
    ax1.legend()
    ax1.grid(True)
    
    ax2.plot(history.history['accuracy'], label = 'Accuracy')
    ax2.plot(history.history['val_accuracy'], label = 'Val_Accuracy')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel("Accuracy")
    ax2.legend()
    ax2.grid(True)
    
    plt.show()

In [None]:
plotLoss(history)

In [None]:
least_val_loss = float('inf')
least_loss_model = None
epochs = 100
for num_nodes in [16,32,64]:
    for dropout_prob in [0, 0.2]:
        for learnRate in [0.01, 0.005, 0.001]:
            for batch_size in [32, 64, 128]:
                print(f" nodes: {num_nodes} , dropout Probability: {dropout_prob}, learning rate: {learnRate}, and Batch Size: {batch_size}")
                model, history = train_NN_Model(X_train, y_train, num_nodes, dropout_prob, learnRate, batch_size, epochs)
                plotLoss(history)
                val_Loss = model.evaluate(X_test, y_test)
                print("Val Loss:", val_Loss)
                #if val_Loss < least_val_loss:
                    #least_val_loss = val_loss
                    #least_loss_model = model

In [None]:
nn_y_pred = least_loss_model.predict(X_test)
nn_y_pred = (y_pred > 0.5).astype(int).reshape(-1,)
print(classification_report(y_test, nn_y_pred))