In [None]:
from keras.models import Sequential
from keras.utils import np_utils 
from keras.layers.core import Dense, Activation, Dropout 
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import StandardScaler
from keras import callbacks
from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np 

data = pd.read_csv('/kaggle/input/weather-dataset-rattle-package/weatherAUS.csv')
data.head()

In [None]:
cols= ["#B2F0E2","#EEA1E5"]
sns.countplot(x= data["RainTomorrow"], palette= cols)

In [None]:
data.info()

In [None]:
#There don't seem to be any error in dates so parsing values into datetime
data['Date']= pd.to_datetime(data["Date"])
#Creating a collumn of year
data['year'] = data.Date.dt.year

# function to encode datetime into cyclic parameters. 
#As I am planning to use this data in a neural network I prefer the months and days in a cyclic continuous feature. 

def encode(data, col, max_val):
    data[col + '_sin'] = np.sin(2 * np.pi * data[col]/max_val)
    data[col + '_cos'] = np.cos(2 * np.pi * data[col]/max_val)
    return data

data['month'] = data.Date.dt.month
data = encode(data, 'month', 12)

data['day'] = data.Date.dt.day
data = encode(data, 'day', 31)

data.head()


In [None]:
s = (data.dtypes == "object")
object_cols = list(s[s].index)
# 
print("Categorical variables:")
print(object_cols)

In [None]:
data.isnull().sum()

In [None]:
data = data.dropna()
data.isnull().sum()

In [None]:
data.info()

In [None]:
from sklearn.model_selection import train_test_split

X = data.copy()
y = X.pop('RainTomorrow')

X = X.drop(columns=['Date', 'month', 'day'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
s = (X.dtypes == "object")
object_cols = list(s[s].index)

print("Categorical variables:")
print(object_cols)

ordinal_encoder = OrdinalEncoder()
X_train[object_cols] = ordinal_encoder.fit_transform(X_train[object_cols])
X_test[object_cols] = ordinal_encoder.transform(X_test[object_cols])



In [None]:
scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train ), columns = X_train.columns)
X_test = pd.DataFrame(scaler.transform(X_test ), columns = X_test.columns)

In [None]:
X_train = X_train.convert_dtypes(convert_floating=True)
X_test = X_test.convert_dtypes(convert_floating=True)
X_train = pd.DataFrame(np.asarray(X_train).astype('float32'), columns = X_train.columns)
X_test = pd.DataFrame(np.asarray(X_test ).astype('float32'), columns = X_test.columns) 

In [None]:
X_train.head()

In [None]:
X_train.info()
len(X.columns)

In [None]:
y_train.describe()

In [None]:
y_train= y_train.replace(to_replace = 'Yes', value = 1)
y_train= y_train.replace(to_replace = 'No', value = 0)
y_test= y_test.replace(to_replace = 'Yes', value = 1)
y_test= y_test.replace(to_replace = 'No', value = 0)

In [None]:
early_stopping = callbacks.EarlyStopping(
    min_delta=0.001, # minimium amount of change to count as an improvement
    patience=20, # how many epochs to wait before stopping
    restore_best_weights=True,
)

model = Sequential(
    [
        Dense(32, input_dim = 26, activation ='relu'),
        Dense(32, activation ='relu'),
        Dense(1, activation ='sigmoid') 
    ]
)


model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
model.fit(X_train, y_train, callbacks=[early_stopping], batch_size = 32, epochs = 150,  validation_split=0.2)


In [None]:
# Predicting the test set results
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5)
print(classification_report(y_test, y_pred))

In [None]:
# confusion matrix
cmap1 = sns.diverging_palette(260,-10,s=50, l=75, n=5, as_cmap=True)
plt.subplots(figsize=(12,8))
cf_matrix = confusion_matrix(y_test, y_pred)
sns.heatmap(cf_matrix/np.sum(cf_matrix), cmap = cmap1, annot = True, annot_kws = {'size':15})