In [151]:
# load class
import numpy as np
import pandas as pd
import os

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [152]:
# load data
df = pd.read_csv('data_modified.csv')
df.head()

Unnamed: 0,Temp,Humd,Label
0,25.5,38.0,1
1,25.5,38.0,1
2,25.5,38.0,1
3,25.5,38.0,1
4,25.5,38.0,1


In [153]:
# checking for missing values
missing_values = df.isnull().sum()
print('Missing Values:\n',missing_values)

Missing Values:
 Temp     0
Humd     0
Label    0
dtype: int64


In [154]:
# removing duplicates
df = df.drop_duplicates()

In [155]:
# preparing features
x = df.drop(['Label'], axis=1)
y = df['Label']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.35) #testing 35% of data

In [156]:
# scale the data using standardization
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [157]:
# K-Nearest Neighbors

from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=3)
knn = knn.fit(x_train, y_train)
predict = knn.predict(x_test)

print('Accuracy:', metrics.accuracy_score(y_test, predict))

Accuracy: 1.0


In [158]:
# Logistic Regression

from sklearn.linear_model import LogisticRegression

lreg = LogisticRegression()
lreg = lreg.fit(x_train, y_train)
lpred = lreg.predict(x_test)

print('Accuracy: {:.2f}'.format(metrics.accuracy_score(y_test, lpred)))

Accuracy: 0.86


In [161]:
# Decision Tree

from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier()
tree = tree.fit(x_train, y_train) 
treepred = tree.predict(x_test)

print('Accuracy: {:.2f}'.format(metrics.accuracy_score(y_test, treepred)))

Accuracy: 0.97


In [162]:
# Naive Bayes

from sklearn.naive_bayes import GaussianNB

nb = GaussianNB()
nb.fit(x_train, y_train)
nb_pred = nb.predict(x_test)

print('Accuracy: {:.2f}'.format(metrics.accuracy_score(y_test, nb_pred)))

Accuracy: 0.50


In [163]:
algorithms = {
    'K-Nearest Neighbors':knn,
    'Logistic Regression': lreg,
    'Decision Tree': tree,
    'Naive Bayes': nb
}
    
results = []

for name, model in algorithms.items():
    y_pred = model.predict(x_test)
    results.append({
        'Model': name,
        'Accuracy': round(metrics.accuracy_score(y_test, y_pred),2),
        'Precision': round(metrics.precision_score(y_test, y_pred, average='weighted'),2),
        'Recall': round(metrics.recall_score(y_test, y_pred, average='weighted'),2),
        'F1-Score':round(metrics.f1_score(y_test, y_pred, average='weighted'),2)
    })
    
results_df = pd.DataFrame(results)
print(results_df)

                 Model  Accuracy  Precision  Recall  F1-Score
0  K-Nearest Neighbors      1.00       1.00    1.00      1.00
1  Logistic Regression      0.86       0.88    0.86      0.84
2        Decision Tree      0.97       0.97    0.97      0.97
3          Naive Bayes      0.50       0.82    0.50      0.49


In [164]:
# save and load scikit-learn models

import pickle

filename = 'model.pickle'

pickle.dump(algorithms, open(filename, 'wb'))