In [42]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import PowerTransformer
from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [2]:
df=pd.read_csv("./weather_norm.csv")

In [3]:
df.head()

Unnamed: 0,temperature,humidity,pressure,Temperature Index,Humidity Index,Pressure Index,Misery Index
0,0.277778,1.679303,0.465116,-0.940573,0.94,-0.077549,0.646201
1,0.296296,1.490335,0.44186,-0.923042,0.92,-0.124052,0.649161
2,0.259259,1.3402,0.44186,-0.956408,0.9,-0.124052,0.653572
3,0.333333,0.81091,0.44186,-0.882891,0.8,-0.124052,0.59631
4,0.388889,0.739737,0.395349,-0.809944,0.78,-0.217058,0.59633


In [4]:
X=df[['temperature','humidity','pressure']]
y=df[['Misery Index']]

In [5]:
X.head()

Unnamed: 0,temperature,humidity,pressure
0,0.277778,1.679303,0.465116
1,0.296296,1.490335,0.44186
2,0.259259,1.3402,0.44186
3,0.333333,0.81091,0.44186
4,0.388889,0.739737,0.395349


In [6]:
X.describe()

Unnamed: 0,temperature,humidity,pressure
count,30248.0,30248.0,30248.0
mean,0.53545,0.005203,0.497042
std,0.183767,1.025275,0.1658
min,0.0,-5.199338,0.0
25%,0.388889,-0.687939,0.395349
50%,0.537037,0.003764,0.488372
75%,0.685185,0.667419,0.604651
max,1.0,5.199338,1.0


In [7]:
y.head()

Unnamed: 0,Misery Index
0,0.646201
1,0.649161
2,0.653572
3,0.59631
4,0.59633


### Reweight outputs to scale between 0-3

In [8]:
for x in range(0,len(y['Misery Index'])):
    if y['Misery Index'][x]<=0.25:
        y['Misery Index'][x]=int(0)
    elif y['Misery Index'][x]<=0.5:
        y['Misery Index'][x]=int(1)
    if y['Misery Index'][x]<=0.75:
        y['Misery Index'][x]=int(2)
    else:
        y['Misery Index'][x]=int(3)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exec(code_obj, self.user_global_ns, self.user_ns)


### 70/30 Train-Test Split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y['Misery Index'], test_size=0.3, random_state=1)

### K-Nearest Neighbors Classifier

In [10]:
# Create copy of training and test values for kNN classifier
knn_Xtrain=X_train
knn_ytrain=y_train
knn_Xtest=X_test
knn_ytest=y_test

In [11]:
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(knn_Xtrain, knn_ytrain)
knn_predict=neigh.predict(knn_Xtest)

In [64]:
knn_score=accuracy_score(knn_ytest, knn_predict)
print("Accuracy score:",knn_score)

Accuracy score: 0.9544903581267218


### Create results dataframe to compare our results across classifiers

In [76]:
dataframe={'Actual': y_test,'KNN-Prediction': knn_predict}
results=pd.DataFrame(data=dataframe)
results.head()

Unnamed: 0,Actual,KNN-Prediction
3203,3.0,2.0
1953,2.0,2.0
1405,3.0,3.0
24476,3.0,3.0
17627,2.0,2.0


### Decision Tree Classifier

In [18]:
# # Create copy of training and test values for kNN classifier
dt_Xtrain=X_train
dt_ytrain=y_train
dt_Xtest=X_test
dt_ytest=y_test

In [19]:
#Create Decision Tree Classifier Object
dt_classifier = DecisionTreeClassifier(random_state=0)
dt_classifier = dt_classifier.fit(dt_Xtrain, dt_ytrain)
dt_predict=dt_classifier.predict(dt_Xtest)

In [63]:
dt_score=accuracy_score(dt_ytest, dt_predict)
print("Accuracy score:",dt_score)

Accuracy score: 0.9672727272727273


### Add DT results dataframe

In [71]:
results['DT Prediction']=dt_predict
results.head()

Unnamed: 0,Actual,KNN-Prediction,DT Prediction,NB Prediction,MLP Prediction,DT Score
3203,3.0,2.0,3.0,2.0,2.0,0.967273
1953,2.0,2.0,2.0,3.0,3.0,0.967273
1405,3.0,3.0,3.0,3.0,3.0,0.967273
24476,3.0,3.0,3.0,3.0,3.0,0.967273
17627,2.0,2.0,2.0,2.0,2.0,0.967273


### Gaussian Naieve Bayes Classifier

In [46]:
# Create copy of training and test values for Naive Bayes classifier
nb_Xtrain=X_train
nb_ytrain=y_train
nb_Xtest=X_test
nb_ytest=y_test

In [47]:
gnb = GaussianNB()
nb_predict = gnb.fit(nb_Xtrain, nb_ytrain).predict(nb_Xtest)

In [62]:
nb_score=accuracy_score(nb_ytest, nb_predict)
print("Accuracy score:",nb_score)

Accuracy score: 0.628099173553719


### Add NB results dataframe

In [49]:
results['NB Prediction']=nb_predict
results.head()

Unnamed: 0,Actual,KNN-Prediction,DT Prediction,NB Prediction
3203,3.0,2.0,3.0,2.0
1953,2.0,2.0,2.0,3.0
1405,3.0,3.0,3.0,3.0
24476,3.0,3.0,3.0,3.0
17627,2.0,2.0,2.0,2.0


### Multi-Layer Preceptron Classifier

In [50]:
# Create copy of training and test values for Naive Bayes classifier
mlp_Xtrain=X_train
mlp_ytrain=y_train
mlp_Xtest=X_test
mlp_ytest=y_test

In [51]:
mlp = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1)
mlp.fit(mlp_Xtrain,mlp_ytrain)



MLPClassifier(alpha=1e-05, hidden_layer_sizes=(5, 2), random_state=1)

In [52]:
mlp_predict=mlp.predict(mlp_Xtest)

In [61]:
mlp_score=accuracy_score(y_test,mlp_predict)
print("Accuracy score:",mlp_score)

Accuracy score: 0.8260055096418732


### Add MLP results to dataframe

In [53]:
results['MLP Prediction']=mlp_predict
results.head()

Unnamed: 0,Actual,KNN-Prediction,DT Prediction,NB Prediction,MLP Prediction
3203,3.0,2.0,3.0,2.0,2.0
1953,2.0,2.0,2.0,3.0,3.0
1405,3.0,3.0,3.0,3.0,3.0
24476,3.0,3.0,3.0,3.0,3.0
17627,2.0,2.0,2.0,2.0,2.0


In [57]:
results.head()

Unnamed: 0,Actual,KNN-Prediction,DT Prediction,NB Prediction,MLP Prediction
3203,3.0,2.0,3.0,2.0,2.0
1953,2.0,2.0,2.0,3.0,3.0
1405,3.0,3.0,3.0,3.0,3.0
24476,3.0,3.0,3.0,3.0,3.0
17627,2.0,2.0,2.0,2.0,2.0


In [95]:
#score_values=(knn_score, dt_score, nb_score, mlp_score)
scores=pd.DataFrame()
scores['KNN']=''
scores['DT']=''
scores['NB']=''
scores['MLP']=''
scores.loc[0,'KNN']=knn_score
scores.loc[0,'DT']=dt_score
scores.loc[0,'NB']=nb_score
scores.loc[0,'MLP']=mlp_score
scores.rename(index={0: 'Accuracy Score'},inplace=True)
scores.head()

Unnamed: 0,KNN,DT,NB,MLP
Accuracy Score,0.95449,0.967273,0.628099,0.826006
