In [1]:
import numpy as np 
import pandas as pd 
df = pd.read_csv('data.csv')

df.head()

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RISK_MM,RainTomorrow
0,2008-12-01,Albury,13.4,22.9,0.6,,,W,44.0,W,...,22.0,1007.7,1007.1,8.0,,16.9,21.8,No,0.0,No
1,2008-12-02,Albury,7.4,25.1,0.0,,,WNW,44.0,NNW,...,25.0,1010.6,1007.8,,,17.2,24.3,No,0.0,No
2,2008-12-03,Albury,12.9,25.7,0.0,,,WSW,46.0,W,...,30.0,1007.6,1008.7,,2.0,21.0,23.2,No,0.0,No
3,2008-12-04,Albury,9.2,28.0,0.0,,,NE,24.0,SE,...,16.0,1017.6,1012.8,,,18.1,26.5,No,1.0,No
4,2008-12-05,Albury,17.5,32.3,1.0,,,W,41.0,ENE,...,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,No,0.2,No


# Dropping unwanted columns

In [2]:
df = df.drop(['Sunshine','Evaporation','Cloud3pm','Cloud9am','Location','RISK_MM','Date', 'RISK_MM'],axis=1)
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(df, test_size = 0.2, random_state = 123)


In [3]:
train_set = train_set.drop("RainToday", axis = 1)

In [4]:
#converting cateogrical to numerical
from sklearn.base import BaseEstimator, TransformerMixin

class DataFrameSelector(BaseEstimator, TransformerMixin):
    def __init__(self, attribute_names):
        self.attribute_names=attribute_names
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return X[self.attribute_names].values

In [5]:
X = train_set.drop("RainTomorrow", axis = 1)
y = train_set["RainTomorrow"].copy()

# Importing sklearn Properties

In [6]:
from sklearn.pipeline import Pipeline
from sklearn.pipeline import FeatureUnion
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

cat_pipeline = Pipeline([
        ("select_cat", DataFrameSelector(["WindGustDir", "WindDir9am", "WindDir3pm"])),
        ("imp", SimpleImputer(missing_values=np.nan, strategy='most_frequent')),
        ("cat_encoder", OneHotEncoder(sparse=False)),
    ])

In [7]:
num_pipeline = Pipeline([
        ("select_numeric", DataFrameSelector(["MinTemp", "MaxTemp", "Rainfall", "WindGustSpeed", "WindSpeed9am", "WindSpeed3pm", "Humidity9am", "Humidity3pm",
                                              "Pressure9am", "Pressure3pm", "Temp9am", "Temp3pm"])),
        ("imp", SimpleImputer(missing_values=np.nan, strategy='mean')),
        ('scaler', StandardScaler()),
        ])
preprocess_pipeline = FeatureUnion(transformer_list=[
        ("num_pipeline", num_pipeline),
        ("cat_pipeline", cat_pipeline),
    ])

In [8]:
#converting categorical data into numerical data using dictonary method
X_train_prepared = preprocess_pipeline.fit_transform(X)
y_train_prepared = y.map({'Yes':1, 'No':0})

In [9]:
X_test = test_set.drop("RainTomorrow", axis = 1)
y_test = test_set["RainTomorrow"].copy()

X_test_prepared = preprocess_pipeline.fit_transform(X_test)
y_test_prepared = y_test.map({'Yes':1, 'No':0})

## Training

In [10]:
#ann model that we learnt in lecture 5 using keras
from keras.utils import to_categorical
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD

n_cols =  X_train_prepared.shape[1]
target =  to_categorical(y_train_prepared)

def get_new_model():
    model = Sequential()
    model.add(Dense(100, activation='relu', input_shape = (n_cols,)))
    model.add(Dense(100, activation='relu'))
    model.add(Dense(2, activation='softmax'))
    return model
print("Testing model with adam optimizer")
model = get_new_model()
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy')
model.fit(X_train_prepared, target)

Using TensorFlow backend.


Testing model with adam optimizer
Epoch 1/1


<keras.callbacks.History at 0x7f568a672cc0>

In [11]:
from keras.callbacks import EarlyStopping

early_stopping_monitor = EarlyStopping(patience=2) 

# Without adding any nodes or layers
model = get_new_model()
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_prepared, target, validation_split=0.3, epochs=20, callbacks = [early_stopping_monitor])


Train on 79627 samples, validate on 34127 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20


<keras.callbacks.History at 0x7f56888efa58>

In [12]:
#we are increasing the no of layers to decrease the loss 
model = Sequential()
model.add(Dense(120, activation='relu', input_shape = (n_cols,)))
model.add(Dense(120, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_prepared, target, validation_split=0.3, epochs=20, callbacks = [early_stopping_monitor])

Train on 79627 samples, validate on 34127 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20


<keras.callbacks.History at 0x7f56811e6710>

In [13]:
#same as above
model = Sequential()
model.add(Dense(120, activation='relu', input_shape = (n_cols,)))
model.add(Dense(120, activation='relu'))
model.add(Dense(120, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_prepared, target, validation_split=0.3, epochs=20, callbacks = [early_stopping_monitor])

Train on 79627 samples, validate on 34127 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20


<keras.callbacks.History at 0x7f5680ea3828>

In [15]:
#training
model = Sequential()
model.add(Dense(120, activation='relu', input_shape = (n_cols,)))
model.add(Dense(120, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_prepared, target, validation_split=0.3, epochs=20, callbacks = [early_stopping_monitor])
#testing the data
test_loss, test_acc = model.evaluate(X_train_prepared, target)
print("Accuracy= ",test_acc)

Train on 79627 samples, validate on 34127 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Accuracy=  0.8574643529039719
