
# Support Vector Machine


## Import Libraries
Let's import some libraries to get started!

In [37]:
import pandas as pd
import numpy as np

## The Data



In [38]:
df = pd.read_csv('preprocessed_dataset.csv')

In [39]:
df.head()

Unnamed: 0.1,Unnamed: 0,PayloadMass,Flights,GridFins,Reused,Legs,Block,ReusedCount,Class,Orbit_ES-L1,...,Serial_B1048,Serial_B1049,Serial_B1050,Serial_B1051,Serial_B1054,Serial_B1056,Serial_B1058,Serial_B1059,Serial_B1060,Serial_B1062
0,0,6104.959412,1,0,0,0,1.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,525.0,1,0,0,0,1.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,677.0,1,0,0,0,1.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,500.0,1,0,0,0,1.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,3170.0,1,0,0,0,1.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Exploratory Data Analysis

Let's begin some exploratory data analysis! We'll start by checking out missing data!

## Missing Data



In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 89 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   Unnamed: 0                           90 non-null     int64  
 1   PayloadMass                          90 non-null     float64
 2   Flights                              90 non-null     int64  
 3   GridFins                             90 non-null     int64  
 4   Reused                               90 non-null     int64  
 5   Legs                                 90 non-null     int64  
 6   Block                                90 non-null     float64
 7   ReusedCount                          90 non-null     int64  
 8   Class                                90 non-null     int64  
 9   Orbit_ES-L1                          90 non-null     int64  
 10  Orbit_GEO                            90 non-null     int64  
 11  Orbit_GTO                         

# Define X , y

In [40]:
X=df.drop('Class',axis=1)
y=df['Class']

Great! Our data is ready for our model!

# Building a Decision tree Model

Let's start by splitting our data into a training set and test set

## Train Test Split

In [6]:
from sklearn.model_selection import train_test_split

In [41]:
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, test_size=0.2, 
                                                    random_state=101)

## Training and Predicting

In [42]:
# Support Vector Machine classification algorithm
from sklearn.svm import SVC

In [43]:
svm = SVC()

In [44]:
svm.fit(X_train,y_train)

In [45]:
y

0     0
1     0
2     0
3     0
4     0
     ..
85    1
86    1
87    1
88    1
89    1
Name: Class, Length: 90, dtype: int64

In [46]:
predictions = svm.predict(X_test)

In [47]:
predictions

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int64)

In [48]:
y_test

50    0
6     1
51    0
54    1
53    1
69    1
32    1
31    1
21    1
88    1
43    1
47    0
3     0
1     0
74    0
16    1
45    0
25    1
Name: Class, dtype: int64

Let's move on to evaluate our model!

## Evaluation

Let's bring Confusion Matrix!

In [49]:
from sklearn.metrics import confusion_matrix

In [16]:
confusion_matrix(y_test,predictions)

array([[ 0,  7],
       [ 0, 11]], dtype=int64)

We can check precision,recall,f1-score using classification report!

In [17]:
from sklearn.metrics import accuracy_score

In [18]:
accuracy_score(y_test,predictions, normalize=False)

11.0

In [19]:
accuracy_score(y_test,predictions, normalize=True)

0.6111111111111112

In [20]:
from sklearn.metrics import classification_report

In [21]:
print(classification_report(y_test,predictions))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         7
           1       0.61      1.00      0.76        11

    accuracy                           0.61        18
   macro avg       0.31      0.50      0.38        18
weighted avg       0.37      0.61      0.46        18



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



## Grid Search !

In [22]:
# Allows us to test parameters of classification algorithms and find the best one
from sklearn.model_selection import GridSearchCV

In [23]:
svm_1 = SVC()

In [24]:
parameters = {'C': [0.5, 1, 1.5],
            'kernel':['linear', 'rbf','sigmoid']}

In [25]:
svm_cv = GridSearchCV(svm_1, parameters)
svm_cv.fit(X_train, y_train)

In [26]:
print(svm_cv.best_params_)

tuned hpyerparameters :(best parameters)  {'C': 0.5, 'kernel': 'linear'}


In [27]:
svm_1 = SVC(C= 0.5, kernel= 'linear')

In [28]:
svm_1.fit(X_train,y_train)

In [30]:
predictions_1 = svm_1.predict(X_test)

In [31]:
confusion_matrix(y_test,predictions_1)

array([[ 5,  2],
       [ 1, 10]], dtype=int64)

In [32]:
accuracy_score(y_test,predictions_1, normalize=False)

15.0

In [33]:
accuracy_score(y_test,predictions_1, normalize=True)

0.8333333333333334

In [34]:
print(classification_report(y_test,predictions))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         7
           1       0.61      1.00      0.76        11

    accuracy                           0.61        18
   macro avg       0.31      0.50      0.38        18
weighted avg       0.37      0.61      0.46        18



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# More parameters in GridSearch

In [35]:
svm_cv = GridSearchCV(svm_1, parameters, cv=4)
svm_cv.fit(X_train, y_train)

In [36]:
svm_cv = GridSearchCV(svm_1, parameters, cv=4, verbose=3 )
svm_cv.fit(X_train, y_train)

Fitting 4 folds for each of 9 candidates, totalling 36 fits
[CV 1/4] END ..............C=0.5, kernel=linear;, score=0.889 total time= 2.1min
[CV 2/4] END ..............C=0.5, kernel=linear;, score=0.833 total time=  12.6s
[CV 3/4] END ..............C=0.5, kernel=linear;, score=0.833 total time=  11.1s
[CV 4/4] END ..............C=0.5, kernel=linear;, score=1.000 total time=   8.5s
[CV 1/4] END .................C=0.5, kernel=rbf;, score=0.667 total time=   0.0s
[CV 2/4] END .................C=0.5, kernel=rbf;, score=0.667 total time=   0.0s
[CV 3/4] END .................C=0.5, kernel=rbf;, score=0.667 total time=   0.0s
[CV 4/4] END .................C=0.5, kernel=rbf;, score=0.722 total time=   0.0s
[CV 1/4] END .............C=0.5, kernel=sigmoid;, score=0.556 total time=   0.0s
[CV 2/4] END .............C=0.5, kernel=sigmoid;, score=0.444 total time=   0.0s
[CV 3/4] END .............C=0.5, kernel=sigmoid;, score=0.611 total time=   0.0s
[CV 4/4] END .............C=0.5, kernel=sigmoid;,