# Classification

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import seaborn as sns
plt.style.use('default')
from sklearn.model_selection import GridSearchCV


import os
import warnings
warnings.filterwarnings("ignore")
seed = 0


# Support Vector Machine

Pulsars are a rare type of Neutron star that produce radio emission detectable here on Earth. They are of considerable scientific interest as probes of space-time, the inter-stellar medium, and states of matter. Machine learning tools are now being used to automatically label pulsar candidates to facilitate rapid analysis.

Each candidate is described by 8 continuous variables, and a single class variable. The first four are simple statistics obtained from the integrated pulse profile (folded profile).

1. Mean of the integrated profile.
2. Standard deviation of the integrated profile.
3. Excess kurtosis of the integrated profile.
4. Skewness of the integrated profile.
5. Mean of the DM-SNR curve.
6. Standard deviation of the DM-SNR curve.
7. Excess kurtosis of the DM-SNR curve.
8. Skewness of the DM-SNR curve.
9. Class

In [4]:
dt_filename = os.path.join( "data/classification", "pulsar_data.csv")

df_pulsar = pd.read_csv(dt_filename).dropna()
print(df_pulsar['target_class'].unique())
features = pd.DataFrame(StandardScaler().fit_transform(df_pulsar.drop(['target_class'], axis = 1)))

features.head()


[0. 1.]


Unnamed: 0,0,1,2,3,4,5,6,7
0,0.390208,0.274138,-0.097154,-0.286951,-0.321398,-0.405724,-0.192994,-0.375589
1,-1.330153,-1.524637,0.217862,0.254214,-0.347241,-0.448114,0.238857,-0.028295
2,0.757336,0.990345,-0.323162,-0.332141,-0.335696,-0.202916,0.039658,-0.293563
3,-1.025075,-1.533176,0.322535,0.235985,-0.334123,-0.29219,0.016941,-0.27267
4,-0.067257,1.38601,0.07988,-0.275909,-0.33387,-0.349579,0.245149,-0.077743


In [3]:
df_pulsar.shape

(9273, 9)

In [4]:
from sklearn import svm

# instantiate classifier with default hyperparameters with kernel=rbf, C=1.0 and gamma=auto
svc=svm.SVC() 

# declare parameters for hyperparameter tuning
parameters = [ {'C':[1, 10, 100, 1000], 'kernel':['linear']},
               {'C':[1, 10, 100, 1000], 'kernel':['rbf'], 'gamma':[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]},
               {'C':[1, 10, 100, 1000], 'kernel':['poly'], 'degree': [2,3,4] ,'gamma':[0.01,0.02,0.03,0.04,0.05]} 
             ]

grid_search = GridSearchCV(estimator = svc,  
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 3,
                           verbose=0)


grid_result = grid_search.fit(features, df_pulsar['target_class'])
grid_result.best_estimator_  
grid_result.best_score_
                              

0.9797260864876524