# Pulsar Classification using Deep Neural Networks

## -- Neccessary Modules

In [1]:
import tensorflow as tf 
from tensorflow import keras
from sklearn import preprocessing, model_selection
import numpy as np 
import pandas as pd

## -- Loading data of stars from CSV

In [2]:
df = pd.read_csv('C:\\Users\\User1\\Desktop\\Pulsar Classification\\Pulsar Classification (Notebook version)\\pulsar_stars.csv')

In [3]:
df.head()

Unnamed: 0,Mean of the integrated profile,Standard deviation of the integrated profile,Excess kurtosis of the integrated profile,Skewness of the integrated profile,Mean of the DM-SNR curve,Standard deviation of the DM-SNR curve,Excess kurtosis of the DM-SNR curve,Skewness of the DM-SNR curve,target_class
0,140.5625,55.683782,-0.234571,-0.699648,3.199833,19.110426,7.975532,74.242225,0
1,102.507812,58.88243,0.465318,-0.515088,1.677258,14.860146,10.576487,127.39358,0
2,103.015625,39.341649,0.323328,1.051164,3.121237,21.744669,7.735822,63.171909,0
3,136.75,57.178449,-0.068415,-0.636238,3.642977,20.95928,6.896499,53.593661,0
4,88.726562,40.672225,0.600866,1.123492,1.17893,11.46872,14.269573,252.567306,0


## -- Converting Data into Numpy Arrays ( For Sake of Convenience)

In [4]:
x = np.array(df.drop(['target_class'],1),dtype = 'float')        # Dropping Target Class
y = np.array(df['target_class'])                 
y.shape = (len(x),1)               # makes it a column vector

## Splitting the data into test set and train set 

In [5]:
x_train ,x_test , y_train, y_test = model_selection.train_test_split(x,y, test_size = 0.3)

## -- Scaling Data for Better Modelling (Only x values)

In [6]:
x_f_train = preprocessing.scale(x_train)
x_f_test = preprocessing.scale(x_test)
y_f_train = y_train
y_f_test = y_test

## -- Using DNN Model for Training Data 

In [7]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(200,activation = tf.nn.relu))
model.add(tf.keras.layers.Dense(300,activation = tf.nn.relu))
model.add(tf.keras.layers.Dense(2,activation = tf.nn.softmax))


model.compile(optimizer = tf.train.AdamOptimizer(),
       loss = 'sparse_categorical_crossentropy',
       metrics=['accuracy'])

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


## Training Data using the model

In [8]:
model.fit(x_f_train,y_f_train, epochs = 30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x20e690d1c88>

## -- Checking Trained Data For Overfitting and Underfitting over tested data

In [9]:
val_loss,val_acc = model.evaluate(x_f_test,y_f_test)
print("Loss % = {} , Accuracy % = {} ".format(val_loss*100,val_acc*100))

Loss % = 6.720539210104076 , Accuracy % = 98.04469347000122 


## -- Predicting Star Type of Test Data from Trained Data (Pulsar or Non-Pulsar)

In [10]:
# [1,0] = Normal Star
# [0,1] = Pulsar

arr = np.array([[1,0],[0,1]])

In [11]:
z = np.round(model.predict(x_f_test))

print('_____________________________________________________')

count1=count2=count3=count4=c5=0

for i in range(0,len(z)):
    if np.array_equal(z[i],arr[0]):
        #print("{} : Normal star".format(z[i]))
        count1+=1
    elif np.array_equal(z[i],arr[1]):
        #print("{} : Pulsar".format(z[i]))
        count2+=1  
    else:
        print("NOT A STAR")
        c5+=1

print('_____________________________________________________')
print("Predicted No of Normal stars  = {}".format(count1))
print("Predicted NO. of Pulsars = {}".format(count2))
print("Not A Star: {}".format(c5))
print("Total tested stars = {}".format(len(z)))


m = y_f_test


print('_____________________________________________________')

for i in range(0,len(m)):
    if m[i] == 0:
        count3+=1
    elif m[i] == 1 :
        count4+=1  
    else:
        print("NOT A STAR")      



print("Original No of Normal stars  = {}".format(count3))
print("Original No. of Pulsars = {}".format(count4))
print("Total tested stars = {}".format(len(x_f_test)))

print('_____________________________________________________')
print("Accuracy = {} %".format((val_acc)*100))

_____________________________________________________
_____________________________________________________
Predicted No of Normal stars  = 4919
Predicted NO. of Pulsars = 451
Not A Star: 0
Total tested stars = 5370
_____________________________________________________
Original No of Normal stars  = 4872
Original No. of Pulsars = 498
Total tested stars = 5370
_____________________________________________________
Accuracy = 98.04469347000122 %


## Saving trained data using pickle

In [12]:
import pickle

c = np.hstack((x_train,y_train))    # Merging the label column (y_test) with the X_test  i.e the total training set

with open("pulsar_model.pickle","wb") as f:
    pickle.dump( c , f)