In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn import preprocessing
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn.metrics import jaccard_score, mean_absolute_error, mean_squared_error
from sklearn.metrics import f1_score
from sklearn.metrics import log_loss, r2_score
from sklearn.metrics import confusion_matrix, accuracy_score
import sklearn.metrics as metrics


url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-ML0101EN-SkillUp/labs/ML-FinalAssignment/Weather_Data.csv'
df = pd.read_csv(url)
df.head()

df_sydney_processed = pd.get_dummies(data=df, columns=['RainToday', 'WindGustDir', 'WindDir9am', 'WindDir3pm'])
df_sydney_processed.replace(['No', 'Yes'], [0,1], inplace=True)

df_sydney_processed.drop('Date',axis=1,inplace=True)
df_sydney_processed = df_sydney_processed.astype(float)
features = df_sydney_processed.drop(columns='RainTomorrow', axis=1)
Y = df_sydney_processed['RainTomorrow']

x_train, x_test, y_train, y_test = train_test_split( features, Y, test_size=0.2, random_state=10)
LinearReg = LinearRegression().fit(x_train, y_train)

LinearRpredictions = LinearReg.predict(x_test)

LinearRegression_MAE = np.mean(np.absolute(LinearRpredictions - y_test))
LinearRegression_MSE = np.mean((LinearRpredictions - y_test) ** 2)
LinearRegression_R2 = 1 - (np.sum((y_test - LinearRpredictions) ** 2) / np.sum((y_test - np.mean(y_test)) ** 2))

LinearReg_metrics = pd.DataFrame({
    'Metric': ['Mean Squared Error (MSE)', 'Mean Absolute Error (MAE)', 'R-squared (R2)'],
    'Value': [LinearRegression_MSE, LinearRegression_MAE, LinearRegression_R2]
})
print('Linear Regression Model Stats:\n', LinearReg_metrics, '\n\n')

KNN = KNeighborsClassifier(n_neighbors = 4).fit(x_train,y_train)
KNNpredictions = KNN.predict(x_test)

KNN_Accuracy_Score = accuracy_score(y_test, KNNpredictions)
KNN_JaccardIndex = jaccard_score(y_test, KNNpredictions)
KNN_F1_Score = f1_score(y_test, KNNpredictions)

KNN_metrics = pd.DataFrame({
    'Model': ['KNN'],
    'Accuracy': [KNN_Accuracy_Score],
    'Jaccard Index': [KNN_JaccardIndex],
    'F1 Score': [KNN_F1_Score],
    'Log Loss': [None]
})


Tree = DecisionTreeClassifier().fit(x_train,y_train)
treepredictions = Tree.predict(x_test)

Tree_Accuracy_Score = accuracy_score(y_test, treepredictions)
Tree_JaccardIndex = jaccard_score(y_test, treepredictions)
Tree_F1_Score = f1_score(y_test, treepredictions)

Tree_metrics = pd.DataFrame({
    'Model': ['Decision Tree'],
    'Accuracy': [Tree_Accuracy_Score],
    'Jaccard Index': [Tree_JaccardIndex],
    'F1 Score': [Tree_F1_Score],
    'Log Loss': [None]
})



x_train, x_test, y_train, y_test = train_test_split(features, Y, test_size=0.2, random_state=1)
LR = LogisticRegression(solver='liblinear').fit(x_train,y_train)

LogisticRegPredictions = LR.predict(x_test)
LogRpredict_proba = LR.predict(x_test)

LR_Accuracy_Score = accuracy_score(y_test, LogisticRegPredictions)
LR_JaccardIndex = jaccard_score(y_test, LogisticRegPredictions)
LR_F1_Score = f1_score(y_test, LogisticRegPredictions)
LR_Log_Loss = log_loss(y_test, LogisticRegPredictions)

LR_metrics = pd.DataFrame({
    'Model': ['Linear Regression'],
    'Accuracy': [LR_Accuracy_Score],
    'Jaccard Index': [LR_JaccardIndex],
    'F1 Score': [LR_F1_Score],
    'Log Loss': [LR_Log_Loss]
})


SVM = svm.SVC().fit(x_train, y_train)
SVMpredictions = SVM.predict(x_test)

SVM_Accuracy_Score = accuracy_score(y_test, SVMpredictions)
SVM_JaccardIndex = jaccard_score(y_test, SVMpredictions)
SVM_F1_Score = f1_score(y_test, SVMpredictions)

SVM_metrics = pd.DataFrame({
    'Model': ['SVM'],
    'Accuracy': [SVM_Accuracy_Score],
    'Jaccard Index': [SVM_JaccardIndex],
    'F1 Score': [SVM_F1_Score],
    'Log Loss': [None]
})


# Concatenate all metrics into a single DataFrame
Report = pd.concat([KNN_metrics, Tree_metrics, LR_metrics, SVM_metrics], ignore_index=True)

print(Report)


Linear Regression Model Stats:
                       Metric     Value
0   Mean Squared Error (MSE)  0.115719
1  Mean Absolute Error (MAE)  0.256309
2             R-squared (R2)  0.427138 


               Model  Accuracy  Jaccard Index  F1 Score  Log Loss
0                KNN  0.818321       0.425121  0.596610      None
1      Decision Tree  0.752672       0.395522  0.566845      None
2  Linear Regression  0.836641       0.509174  0.674772  5.888047
3                SVM  0.722137       0.000000  0.000000      None
