In [None]:
This dataset contains observations of weather metrics for each day from 2008 to 2017. The **weatherAUS.csv** dataset includes the following fields:

| Field         | Description                                           | Unit            | Type   |
| ------------- | ----------------------------------------------------- | --------------- | ------ |
| Date          | Date of the Observation in YYYY-MM-DD                 | Date            | object |
| Location      | Location of the Observation                           | Location        | object |
| MinTemp       | Minimum temperature                                   | Celsius         | float  |
| MaxTemp       | Maximum temperature                                   | Celsius         | float  |
| Rainfall      | Amount of rainfall                                    | Millimeters     | float  |
| Evaporation   | Amount of evaporation                                 | Millimeters     | float  |
| Sunshine      | Amount of bright sunshine                             | hours           | float  |
| WindGustDir   | Direction of the strongest gust                       | Compass Points  | object |
| WindGustSpeed | Speed of the strongest gust                           | Kilometers/Hour | object |
| WindDir9am    | Wind direction averaged of 10 minutes prior to 9am    | Compass Points  | object |
| WindDir3pm    | Wind direction averaged of 10 minutes prior to 3pm    | Compass Points  | object |
| WindSpeed9am  | Wind speed averaged of 10 minutes prior to 9am        | Kilometers/Hour | float  |
| WindSpeed3pm  | Wind speed averaged of 10 minutes prior to 3pm        | Kilometers/Hour | float  |
| Humidity9am   | Humidity at 9am                                       | Percent         | float  |
| Humidity3pm   | Humidity at 3pm                                       | Percent         | float  |
| Pressure9am   | Atmospheric pressure reduced to mean sea level at 9am | Hectopascal     | float  |
| Pressure3pm   | Atmospheric pressure reduced to mean sea level at 3pm | Hectopascal     | float  |
| Cloud9am      | Fraction of the sky obscured by cloud at 9am          | Eights          | float  |
| Cloud3pm      | Fraction of the sky obscured by cloud at 3pm          | Eights          | float  |
| Temp9am       | Temperature at 9am                                    | Celsius         | float  |
| Temp3pm       | Temperature at 3pm                                    | Celsius         | float  |
| RainToday     | If there was rain today                               | Yes/No          | object |
| RISK_MM       | Amount of rain tomorrow                               | Millimeters     | float  |
| RainTomorrow  | If there is rain tomorrow                             | Yes/No          | float  |

Column definitions were gathered from [http://www.bom.gov.au/climate/dwo/IDCJDW0000.shtml](http://www.bom.gov.au/climate/dwo/IDCJDW0000.shtml?utm_medium=Exinfluencer&utm_source=Exinfluencer&utm_content=000026UJ&utm_term=10006555&utm_id=NA-SkillsNetwork-Channel-SkillsNetworkCoursesIBMDeveloperSkillsNetworkML0101ENSkillsNetwork20718538-2022-01-01)



: 

## **Import the required libraries**


In [None]:
# Surpress warnings:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

In [None]:
import piplite
await piplite.install(['pandas'])
await piplite.install(['numpy'])


In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn import preprocessing
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn.metrics import jaccard_score
from sklearn.metrics import f1_score
from sklearn.metrics import log_loss
from sklearn.metrics import confusion_matrix, accuracy_score
import sklearn.metrics as metrics

### Importing the Dataset


In [None]:
from pyodide.http import pyfetch

async def download(url, filename):
    response = await pyfetch(url)
    if response.status == 200:
        with open(filename, "wb") as f:
            f.write(await response.bytes())

In [None]:
path='https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-ML0101EN-SkillUp/labs/ML-FinalAssignment/Weather_Data.csv'

In [None]:
await download(path, "Weather_Data.csv")
filename ="Weather_Data.csv"

In [None]:
df = pd.read_csv("Weather_Data.csv")
df.head()

### Data Preprocessing


In [None]:
df_sydney_processed = pd.get_dummies(data=df, columns=['RainToday', 'WindGustDir', 'WindDir9am', 'WindDir3pm'])

In [None]:
df_sydney_processed.replace(['No', 'Yes'], [0,1], inplace=True)

### Training Data and Test Data


In [None]:
df_sydney_processed.drop('Date',axis=1,inplace=True)

In [None]:
df_sydney_processed = df_sydney_processed.astype(float)

In [None]:
features = df_sydney_processed.drop(columns='RainTomorrow', axis=1)
Y = df_sydney_processed['RainTomorrow']

### Linear Regression


In [None]:
x_train, x_test, y_train, y_test = train_test_split(features, Y, test_size=0.2, random_state=10)


In [None]:
LinearReg = LinearRegression()
LinearReg.fit(x_train, y_train)

In [None]:
predictions = LinearReg.predict(x_test)
predictions

In [None]:
LinearRegression_MAE = metrics.mean_absolute_error(predictions, y_test)
LinearRegression_MSE = metrics.mean_squared_error(predictions, y_test)
LinearRegression_R2 = metrics.r2_score(predictions, y_test)

print("MAE: ", LinearRegression_MAE)
print("MSE: ", LinearRegression_MSE)
print("R2: ", LinearRegression_R2)

In [None]:
Report = pd.DataFrame({
    "MAE": [LinearRegression_MAE],
    "MSE": [LinearRegression_MSE],
    "R2": [LinearRegression_R2]
})
Report.index = ["Linear Regression"]
Report

### KNN


In [None]:
k = 4
KNN = KNeighborsClassifier(n_neighbors=k)
KNN.fit(x_train, y_train)

In [None]:
predictions = KNN.predict(x_test)
predictions

In [None]:
KNN_Accuracy_Score = accuracy_score(predictions, y_test)
KNN_JaccardIndex = jaccard_score(predictions, y_test, pos_label=0)
KNN_F1_Score = f1_score(predictions, y_test)
print("Accuracy Score: ", KNN_Accuracy_Score)
print("Jaccard Index: ", KNN_JaccardIndex)
print("F1 Score: ", KNN_F1_Score)

### Decision Tree


In [None]:
Tree = DecisionTreeClassifier(criterion="entropy")
Tree.fit(x_train, y_train)

In [None]:
predictions = Tree.predict(x_test)
predictions

In [None]:
Tree_Accuracy_Score = accuracy_score(predictions, y_test)
Tree_JaccardIndex = jaccard_score(predictions, y_test, pos_label=0)
Tree_F1_Score = f1_score(predictions, y_test)
print("Accuracy Score: ", Tree_Accuracy_Score)
print("Jaccard Index: ", Tree_JaccardIndex)
print("F1 Score: ", Tree_F1_Score)

### Logistic Regression


In [None]:
x_train, x_test, y_train, y_test = train_test_split(features, Y, test_size=0.2, random_state=1)
print(x_train, x_test, y_train, y_test)

In [None]:
LR = LogisticRegression(solver='liblinear')
LR.fit(x_train, y_train)

In [None]:
predictions = LR.predict(x_test)
predict_proba = LR.predict_proba(x_test)
print(predictions, predict_proba)

In [None]:
LR_Accuracy_Score = accuracy_score(predictions, y_test)
LR_JaccardIndex = jaccard_score(predictions, y_test, pos_label=0)
LR_F1_Score = f1_score(predictions, y_test)
LR_Log_Loss = log_loss(y_test, predict_proba)
print("accuracy score: ", LR_Accuracy_Score)
print("Jaccard Index: ", LR_JaccardIndex)
print("F1 score: ", LR_F1_Score)
print("Log Loss: ", LR_Log_Loss)


### SVM


In [None]:
SVM = svm.SVC(kernel='linear')
SVM.fit(x_train, y_train)

In [None]:
predictions = SVM.predict(x_test)
predictions

In [None]:
SVM_Accuracy_Score = accuracy_score(predictions, y_test)
SVM_JaccardIndex = jaccard_score(predictions, y_test, pos_label=0)
SVM_F1_Score = f1_score(predictions, y_test)
print("accuracy score: ", SVM_Accuracy_Score)
print("Jaccard Index: ", SVM_JaccardIndex)
print("F1 score: ", SVM_F1_Score)

### Report


In [None]:
Report = pd.DataFrame({
    "Classification Method": ["KNN with k=4", "Decision Tree", "Logistic Regression(liblinear)", "SVM(linear)"],
    "Accuracy Score": [KNN_Accuracy_Score, Tree_Accuracy_Score, LR_Accuracy_Score, SVM_Accuracy_Score],
    "Jaccard Index": [KNN_JaccardIndex, Tree_JaccardIndex, LR_JaccardIndex, SVM_JaccardIndex,],
    "F1 Score": [KNN_F1_Score, Tree_F1_Score, LR_F1_Score, SVM_F1_Score], 
    "Log Loss": ["--", "--", LR_Log_Loss, "--"]
})
Report.index=["", "", "", ""]
Report