In [1]:
# Lockdown Mania

In [2]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Algorithm
from sklearn.ensemble import RandomForestClassifier

In [3]:
# Importing training dataset
df_train = pd.read_csv('coronaTrain.csv')
X_train = df_train.iloc[:,1:1001]
Y_train = df_train.iloc[:, -1]

#importing test dataset
df_test = pd.read_csv('coronaTest.csv')
X_test = df_test.iloc[:,1:1001]

In [4]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

In [5]:
# Random Forest on training set
random_forest = RandomForestClassifier(n_estimators=100)
random_forest.fit(X_train, Y_train)
acc_random_forest = round(random_forest.score(X_train, Y_train) * 100, 2)
print(acc_random_forest)

100.0


In [6]:
# Applying k-Fold Cross Validation
from sklearn.model_selection import cross_val_score
accuracies_rf = cross_val_score(random_forest, X = X_train, y = Y_train, cv = 10,scoring='accuracy')
print(accuracies_rf.mean())
print(accuracies_rf.std())

0.8648342464221825
0.014480089306378369


In [7]:
# Hyper-parameter Tuning
parameters = { "criterion" : ["gini", "entropy"], 
               "min_samples_leaf" : [1, 5, 10], 
               "min_samples_split" : [2, 8, 12, 18], 
               "n_estimators": [400, 700, 1000]}
from sklearn.model_selection import GridSearchCV, cross_val_score
random_forest = RandomForestClassifier(n_estimators=100, 
                            random_state=1, 
                            n_jobs=-1)
clf = GridSearchCV(estimator=random_forest, 
                   param_grid=parameters, 
                   scoring='accuracy',
                   n_jobs=-1)
clf.fit(X_train, Y_train)
print(clf.best_params_)
print(clf.best_score_)



{'criterion': 'entropy', 'min_samples_leaf': 1, 'min_samples_split': 8, 'n_estimators': 1000}
0.9153069153069153


In [10]:
## Random Forest with new parameters
random_forest = RandomForestClassifier(criterion = "entropy", 
                                       min_samples_leaf = 1, 
                                       min_samples_split = 8,   
                                       n_estimators=1000, 
                                       random_state=1, 
                                       n_jobs=-1)

random_forest.fit(X_train, Y_train)
Y_pred = random_forest.predict(X_test)
print(Y_pred)

[0 2 2 ... 0 1 0]


In [18]:
# Exporting Submission File
Y_pred = pd.DataFrame(Y_pred)
Y_pred.column = ['Status']
print(Y_pred)
Y_pred.to_csv('LockdownManiaFinal.csv')

  This is separate from the ipykernel package so we can avoid doing imports until


      Index  Status
0         0       0
1         1       2
2         2       2
3         3       0
4         4       0
5         5       2
6         6       2
7         7       2
8         8       2
9         9       0
10       10       0
11       11       0
12       12       1
13       13       1
14       14       1
15       15       2
16       16       0
17       17       1
18       18       1
19       19       2
20       20       2
21       21       1
22       22       1
23       23       1
24       24       1
25       25       2
26       26       1
27       27       0
28       28       2
29       29       2
...     ...     ...
1302   1302       1
1303   1303       2
1304   1304       1
1305   1305       1
1306   1306       2
1307   1307       0
1308   1308       2
1309   1309       0
1310   1310       2
1311   1311       2
1312   1312       1
1313   1313       2
1314   1314       1
1315   1315       1
1316   1316       2
1317   1317       0
1318   1318       2
1319   1319       2


In [None]:
## The End