In [1]:
#importing libraries
import pandas as pd
import numpy as np
from sklearn.metrics import recall_score,accuracy_score,precision_score,f1_score,confusion_matrix

#reading the file and droppping the stab columns
data=pd.read_csv('Data_for_UCI_named.csv')
data=data.drop(columns=['stab'])

#creating the features and target variable
x=data.drop(columns=['stabf'])
y=data['stabf']

#splitting the data into train and test
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=1)

#standard scaling the x_train data
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
standard_train_df=sc.fit_transform(x_train)
standard_train_df=pd.DataFrame(standard_train_df,columns=x_train.columns)

#standard scaling the x_test data
standard_test_df=sc.transform(x_test)
standard_test_df=pd.DataFrame(standard_test_df,columns=x_test.columns)

## QUESTION 14

In [2]:
#RANDOM FOREST CLASSIFIER
from sklearn.ensemble import RandomForestClassifier
rfc=RandomForestClassifier(random_state=1)
rfc.fit(standard_train_df,y_train)

new_predictions=rfc.predict(standard_test_df)

#accuracy
accuracy=accuracy_score(y_true=y_test,y_pred=new_predictions)
print(round(accuracy,4))



0.929


##   QUESTION 15

In [3]:
# XGB CLASSIFIER
from xgboost import XGBClassifier 
xgb=XGBClassifier(random_state=1,max_depth=3,learning_rate=0.1)
xgb.fit(standard_train_df,y_train)


new_predictions=xgb.predict(standard_test_df)

#ACCURACY
accuracy=accuracy_score(y_true=y_test,y_pred=new_predictions)
print(round(accuracy,4))


0.9195


## QUESTION 16

In [4]:
# LGBM CLASSIFIER

from lightgbm import LGBMClassifier
lgbm=LGBMClassifier(random_state=1)
lgbm.fit(standard_train_df,y_train)

new_predictions=lgbm.predict(standard_test_df)

#accuracy
accuracy=accuracy_score(y_true=y_test,y_pred=new_predictions)
print(round(accuracy,4))



0.9375


## QUESTION 17

In [6]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import ExtraTreesClassifier

n_estimators = [50, 100, 300, 500, 1000]
min_samples_split = [2, 3, 5, 7, 9]
min_samples_leaf = [1, 2, 4, 6, 8]
max_features = ['auto', 'sqrt', 'log2', None]
hyperparameter_grid = {'n_estimators': n_estimators,'min_samples_leaf': min_samples_leaf,
                       'min_samples_split': min_samples_split,'max_features': max_features}


clf=RandomizedSearchCV(ExtraTreesClassifier(random_state=1),hyperparameter_grid,cv=5,
                       n_iter=10,scoring='accuracy',n_jobs = -1,verbose = 1)

clf.fit(standard_train_df,y_train)


#confusion matrix
new_predictions=clf.predict(standard_test_df)
cnf_mat=confusion_matrix(y_true=y_test,y_pred=new_predictions,labels=['stable','unstable'])
print(cnf_mat)

#accuracy
accuracy=accuracy_score(y_true=y_test,y_pred=new_predictions)
print('Accuracy: {}'.format(round(accuracy*100),2))

#precision
precision=precision_score(y_true=y_test,y_pred=new_predictions, pos_label='unstable')
print('precision:{}'.format(round(precision*100),2))

#recall
recall=recall_score(y_true=y_test,y_pred=new_predictions,pos_label='unstable')
print('Recall:{}'.format(round(recall*100),2))

#
f1=f1_score(y_true=y_test,y_pred=new_predictions,pos_label='unstable')
print('F1:{}'.format(round(f1*100),2))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:  3.8min
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:  4.1min finished


[[ 598  114]
 [  15 1273]]
Accuracy: 94
precision:92
Recall:99
F1:95


## QUESTION 18

In [7]:
#EXTRA TREES CLASSIFIER

etc=ExtraTreesClassifier(random_state=1)
etc.fit(standard_train_df,y_train)


#confusion matrix
new_predictions=etc.predict(standard_test_df)
cnf_mat=confusion_matrix(y_true=y_test,y_pred=new_predictions,labels=['stable','unstable'])
print(cnf_mat)

#accuracy
accuracy=accuracy_score(y_true=y_test,y_pred=new_predictions)
print('Accuracy: {}'.format(round(accuracy*100),2))

#precision
precision=precision_score(y_true=y_test,y_pred=new_predictions, pos_label='unstable')
print('precision:{}'.format(round(precision*100),2))

#recall
recall=recall_score(y_true=y_test,y_pred=new_predictions,pos_label='unstable')
print('Recall:{}'.format(round(recall*100),2))

#
f1=f1_score(y_true=y_test,y_pred=new_predictions,pos_label='unstable')
print('F1:{}'.format(round(f1*100),2))

[[ 606  106]
 [  38 1250]]
Accuracy: 93
precision:92
Recall:97
F1:95


QUESTION 20

In [8]:
#FEATURE IMPOTANCES 
pd.Series(etc.feature_importances_,index=standard_train_df.columns).sort_values()

p1      0.039507
p2      0.040371
p4      0.040579
p3      0.040706
g1      0.089783
g2      0.093676
g4      0.094019
g3      0.096883
tau3    0.113169
tau4    0.115466
tau1    0.117397
tau2    0.118445
dtype: float64