# Import Required Libraries

In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score

In [15]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier

In [16]:
TRAIN = pd.read_csv('../input/asteroid-train-and-test-dataset-clean/TRAIN_CLEANED.csv')
TEST = pd.read_csv('../input/asteroid-train-and-test-dataset-clean/TEST_CLEANED.csv')
TRAIN.head()

Unnamed: 0,near_earth,potentially_hazardous,absolute_magnitude_parameter,epoch,eccentricity,semi_major_axis,perihelion_distance,inclination,om,w,...,ad,n,tp,per,per_y,moid_ld,sigma_i,sigma_w,class,rms
0,0,0,19.487,2459000.5,0.302488,2.317476,1.616467,8.616829,120.269614,151.286378,...,3.018484,0.279371,2458461.0,1288.609117,3.528019,235.966224,0.000715,0.002956,7,0.44852
1,0,0,16.1,2459000.5,0.019156,3.10636,3.046854,7.599659,224.086389,316.605492,...,3.165865,0.180022,2459633.0,1999.75152,5.475021,797.708991,7e-06,0.000163,6,0.57734
2,0,0,15.6,2459000.5,0.16768,2.608938,2.171471,13.473446,9.697766,67.936834,...,3.046404,0.233888,2459715.0,1539.196089,4.214089,476.818867,9e-06,3.5e-05,6,0.56499
3,0,0,19.1,2459000.5,0.288892,2.197469,1.562638,8.658255,185.816818,186.820179,...,2.832301,0.302566,2459338.0,1189.823817,3.25756,219.160307,4.3e-05,0.000876,7,0.66681
4,0,0,15.5,2459000.5,0.092806,2.866441,2.600417,1.076426,268.833672,67.47611,...,3.132465,0.20309,2459492.0,1772.608946,4.853139,619.161687,5e-06,0.000266,6,0.58503


In [17]:
temp_df = TRAIN.copy()
X = temp_df.drop('potentially_hazardous', axis = 1)
y = temp_df['potentially_hazardous']

In [18]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size = 0.2, stratify = y, random_state = 42)

In [19]:
X_test = TEST.drop('potentially_hazardous', axis = 1)
y_test = TEST['potentially_hazardous']

In [20]:
models = [LogisticRegression(), GaussianNB(), KNeighborsClassifier(), DecisionTreeClassifier(), LGBMClassifier()]

In [21]:
def get_model_accuracy(model_class):
    model = model_class
    model.fit(X_train, y_train)
    predictions = model.predict(X_valid)
    print('\nModel: ' + str(model_class))
    print(f'Accuracy Score: {accuracy_score(predictions, y_valid)}')
    print('Classification Report:')
    print(classification_report(predictions, y_valid))
    print('Confusion Matrix:')
    print(confusion_matrix(predictions, y_valid))
    print('')
    return recall_score(predictions, y_valid)

# Get Accuracy of Models

In [22]:
evaluation_table = {}

In [23]:
get_model_accuracy(CatBoostClassifier())
evaluation_table['CatBoostClassifier()'] = get_model_accuracy(CatBoostClassifier())

Learning rate set to 0.044852
0:	learn: 0.5627700	total: 15.2ms	remaining: 15.2s
1:	learn: 0.4459117	total: 31.1ms	remaining: 15.5s
2:	learn: 0.3330756	total: 45.1ms	remaining: 15s
3:	learn: 0.2664113	total: 61.3ms	remaining: 15.3s
4:	learn: 0.2114884	total: 76.8ms	remaining: 15.3s
5:	learn: 0.1719329	total: 91ms	remaining: 15.1s
6:	learn: 0.1389269	total: 106ms	remaining: 15.1s
7:	learn: 0.1095565	total: 120ms	remaining: 14.9s
8:	learn: 0.0864265	total: 136ms	remaining: 15s
9:	learn: 0.0666199	total: 150ms	remaining: 14.9s
10:	learn: 0.0536540	total: 164ms	remaining: 14.8s
11:	learn: 0.0427806	total: 178ms	remaining: 14.6s
12:	learn: 0.0337512	total: 192ms	remaining: 14.6s
13:	learn: 0.0266293	total: 206ms	remaining: 14.5s
14:	learn: 0.0207874	total: 219ms	remaining: 14.4s
15:	learn: 0.0167246	total: 232ms	remaining: 14.3s
16:	learn: 0.0135053	total: 245ms	remaining: 14.2s
17:	learn: 0.0111986	total: 259ms	remaining: 14.1s
18:	learn: 0.0092353	total: 272ms	remaining: 14s
19:	learn: 0.

In [24]:
get_model_accuracy(XGBClassifier())
evaluation_table['XGBoostClassifier()'] = get_model_accuracy(XGBClassifier())






Model: XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=4, num_parallel_tree=1, random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)
Accuracy Score: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      7815
           1       1.00      1.00      1.00        21

    accuracy                           1.00      7836
   macro avg       1.00      1.00      1.00      7836
weighted avg       1.00      1.00      1.00      7836

Confusion Matrix:
[[7815    0]
 [   0   

In [25]:
get_model_accuracy(RandomForestClassifier())
evaluation_table['RandomForestClassifier()'] = get_model_accuracy(RandomForestClassifier())


Model: RandomForestClassifier()
Accuracy Score: 0.9997447677386422
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      7817
           1       0.90      1.00      0.95        19

    accuracy                           1.00      7836
   macro avg       0.95      1.00      0.97      7836
weighted avg       1.00      1.00      1.00      7836

Confusion Matrix:
[[7815    2]
 [   0   19]]


Model: RandomForestClassifier()
Accuracy Score: 0.9998723838693211
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      7816
           1       0.95      1.00      0.98        20

    accuracy                           1.00      7836
   macro avg       0.98      1.00      0.99      7836
weighted avg       1.00      1.00      1.00      7836

Confusion Matrix:
[[7815    1]
 [   0   20]]



In [26]:
for i in range(len(models)):
    evaluation_table[str(models[i])] = get_model_accuracy(models[i])

AttributeError: 'str' object has no attribute 'decode'

In [None]:
print(evaluation_table)

In [None]:
type(evaluation_table)

In [None]:
for e in evaluation_table:
    print(e)

In [None]:
model_performance_table = pd.DataFrame(evaluation_table.values(), evaluation_table.keys())
model_performance_table.rename(columns = {0: 'Recall Score'}, inplace = True)
model_performance_table.sort_values('Recall Score', inplace = True, ascending = False)
model_performance_table