In [1]:
import pickle
from sklearn.model_selection import StratifiedKFold,cross_val_predict,train_test_split
from sklearn.metrics import accuracy_score,classification_report,precision_score,recall_score,f1_score,confusion_matrix

In [2]:
#Loading processed data from preprocessing.ipynb
with open('processed_data.pkl','rb') as file:
    data=pickle.load(file)

In [3]:
data.head()

Unnamed: 0,Prefix_Suffix,having_Sub_Domain,SSLfinal_State,Domain_registeration_length,Request_URL,URL_of_Anchor,Links_in_tags,SFH,web_traffic,Google_Index,Links_pointing_to_page,Result
0,0,-1,-1,0,1,-1,1,-1,-1,1,1,0
1,0,0,1,0,1,0,-1,-1,0,1,1,0
2,0,-1,-1,0,1,0,-1,-1,1,1,0,0
3,0,-1,-1,1,0,0,0,-1,1,1,-1,0
4,0,1,1,0,1,0,0,-1,0,1,1,1


In [4]:
X=data.drop(columns=['Result']) #Store only target variable
y=data['Result']    #Store all input features

In [None]:
#It is spliting the data 70% for training ad 30% for testing
#X_train-> Input features for training
#y_train-> Target values for training
#X_test-> Input features for testing
#y_test-> Target values for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
X_train.head()

Unnamed: 0,Prefix_Suffix,having_Sub_Domain,SSLfinal_State,Domain_registeration_length,Request_URL,URL_of_Anchor,Links_in_tags,SFH,web_traffic,Google_Index,Links_pointing_to_page
10168,0,0,-1,0,0,-1,1,-1,0,1,1
10167,0,1,-1,0,0,0,-1,-1,1,1,-1
2115,0,0,-1,1,0,-1,0,-1,-1,1,0
1699,0,-1,-1,0,1,-1,-1,-1,0,0,0
1379,0,1,1,0,1,0,-1,-1,1,1,0


In [7]:
X_test.head()

Unnamed: 0,Prefix_Suffix,having_Sub_Domain,SSLfinal_State,Domain_registeration_length,Request_URL,URL_of_Anchor,Links_in_tags,SFH,web_traffic,Google_Index,Links_pointing_to_page
10582,0,-1,0,0,0,-1,-1,-1,0,1,1
7533,0,0,-1,0,1,0,-1,-1,1,1,-1
10806,0,1,-1,1,1,0,0,-1,0,0,1
2992,0,0,1,0,1,0,0,-1,1,0,1
8940,0,1,1,1,0,0,-1,1,1,0,0


In [6]:
#Importing ELM class from elmModel.py
from elmModel import ELM

Hello World 4


In [None]:
#Initializing the model with 200 hidden neurons, sigmoid activation function & random state of 42
elm=ELM(n_hidden=200,activation='sigmoid',random_state=42)

In [8]:
#Doing Cross Validation to avoid overfitiing 
#It will make 5 splits of data and then trained 4 splits and test the 5th one step by step
cv=StratifiedKFold(n_splits=5,shuffle=True,random_state=42)

In [9]:
#Prediction of Cross Validation of ELM model
y_pred_cv=cross_val_predict(elm,X_train,y_train,cv=cv)

In [10]:
print("Cross-Validation Accuracy:", accuracy_score(y_train, y_pred_cv))
print("Classification Report:\n", classification_report(y_train, y_pred_cv))

Cross-Validation Accuracy: 0.6316877746187646
Classification Report:
               precision    recall  f1-score   support

           0       0.55      1.00      0.71      3470
           1       1.00      0.33      0.50      4268

    accuracy                           0.63      7738
   macro avg       0.77      0.67      0.60      7738
weighted avg       0.80      0.63      0.59      7738



In [11]:
#Fitting Elm model with training data
elm.fit(X_train,y_train)

In [12]:
#Predicting the test data
pred=elm.predict(X_test)
pred

array([0, 0, 0, ..., 0, 1, 0])

Evaluation Metrices

In [13]:
print("Accuracy: ",accuracy_score(y_test,pred))
print("Classification Report: \n",classification_report(y_test,pred))
print("Precision: ",precision_score(y_test,pred))
print("Recall: ",recall_score(y_test,pred))
print("F1 Score: ",f1_score(y_test,pred))
print("Confusion Matrix: ",confusion_matrix(y_test,pred))

Accuracy:  0.619837202291227
Classification Report: 
               precision    recall  f1-score   support

           0       0.53      1.00      0.69      1428
           1       1.00      0.33      0.50      1889

    accuracy                           0.62      3317
   macro avg       0.76      0.67      0.60      3317
weighted avg       0.80      0.62      0.58      3317

Precision:  0.9968354430379747
Recall:  0.33350979354155635
F1 Score:  0.49980166600555337
Confusion Matrix:  [[1426    2]
 [1259  630]]


In [14]:
#It is a object of evaluation metrices score
elm_evaluation={
    'precision':precision_score(y_test,pred),
    'recall':recall_score(y_test,pred),
    'f1_score':f1_score(y_test,pred),
    'accuracy':accuracy_score(y_test,pred),
    'classification':classification_report(y_test,pred,output_dict=True),
    'confusion_matrix':confusion_matrix(y_test,pred).tolist()
}

In [15]:
#Make a pickle file to use in other file
with open('elm_evaluation.pkl','wb') as file:
    pickle.dump(elm_evaluation,file)