# Data Pre-processing

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Conv1D, Flatten, MaxPool1D, Dropout, Reshape
from sklearn.metrics import classification_report

In [None]:
weather_raw_df = pd.read_csv('the_ultimate_complete_data_delet.csv',',',header=0,dtype='float64',nrows=100000)
weather_raw_df.drop(['Belfort Rain Gauge','Logger Battery Voltage','Year','Julian Day','Time'],axis=1,inplace=True)

weather_labels = weather_raw_df['Precipitation (Tipping Bucket) 3H'].copy()

weather_raw_df.drop(['Precipitation (Tipping Bucket) 3H'],axis=1,inplace=True)

weather_raw_df.head(10)

In [None]:
mask = weather_raw_df['Precipitation (Tipping Bucket)'] > 0
mask2 = weather_raw_df['Precipitation (Tipping Bucket)'] <= 0
weather_raw_df.loc[mask,'Precipitation (Tipping Bucket)'] = 1
weather_raw_df.loc[mask2,'Precipitation (Tipping Bucket)'] = 0

In [None]:
i = 0
for column in weather_raw_df:
    if i < 9:
        weather_raw_df.loc[:,column] += np.absolute(weather_raw_df[column].min())
        weather_raw_df.loc[:,column] /= weather_raw_df[column].max()
        weather_raw_df.loc[:,column] -= np.mean(weather_raw_df[column])
        #weather_raw_df.loc[:,column] /= np.std(weather_raw_df[column])
    i += 1

In [None]:
mask = weather_labels > 0
mask2 = weather_labels <= 0
weather_labels.loc[mask] = 1
weather_labels.loc[mask2] = 0

In [None]:
#one-hot-encoding
weather_labels_ohe = pd.get_dummies(weather_labels)

# K-Fold Cross Validation

In [None]:
from sklearn.model_selection import train_test_split,StratifiedKFold,cross_validate

In [None]:
kfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=42)

In [None]:
x_train = weather_raw_df
y_train = weather_labels
y_train_ohe = weather_labels_ohe

# CNN Model

In [None]:
model = Sequential([
     Reshape((1,10), input_shape=(10,)),
     Conv1D(32, (1), activation='relu', input_shape=(1,10)),
     Conv1D(64, (1), activation='relu'),
     MaxPool1D(1),
     Flatten(),
     Dense(128, activation='relu'),
     Dropout(0.2),
     Dense(128, activation='relu'),
     Dropout(0.2),
     Dense(128, activation='relu'),
     Dense(128, activation='relu'),
     Dense(2, activation='softmax')
])

model.compile(optimizer='sgd',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# info = model.fit(x=x_train, y=y_train, validation_data=(x_val,y_val), epochs=15)
info = model.fit(x=x_train, y=y_train, validation_split=0.2, epochs=3)

In [None]:
predictions = model.predict(x_test)
predictions = np.argmax(predictions,axis=1)

In [None]:
print(classification_report(y_test, predictions))

# SVM Model

In [None]:
from sklearn import svm

In [None]:
# Polynomial degree 5, C=1
SVM = svm.SVC(kernel='poly',degree=5,C=1,gamma='scale', verbose=True)

In [None]:
svm_score = cross_validate(SVM, x_train, y_train, scoring=['accuracy','f1'],cv=kfold)

In [None]:
#print average metrics for kfold
for key in svm_score:
    print(key + ": " + str(np.average(svm_score[key])))

In [None]:
# Polynomial degree 4, C=2
SVM = svm.SVC(kernel='poly',degree=4,C=2,gamma='scale', verbose=True)

In [None]:
svm_score = cross_validate(SVM, x_train, y_train, scoring=['accuracy','f1'],cv=kfold)

In [None]:
for key in svm_score:
    print(key + ": " + str(np.average(svm_score[key])))

# Random Forest Model

In [None]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
#auto (n) features
RF = RandomForestRegressor(n_estimators=1500,max_features='auto',random_state=42,verbose=1)

In [None]:
rf_score = cross_validate(RF, x_train, y_train, scoring=['accuracy','f1'],cv=kfold)

In [None]:
for key in svm_score:
    print(key + ": " + str(np.average(svm_score[key])))

In [None]:
#sqrt features
RF = RandomForestRegressor(n_estimators=1500,max_features='sqrt',random_state=42,verbose=1)

In [None]:
rf_score = cross_validate(RF, x_train, y_train, scoring=['accuracy','f1'],cv=kfold)

In [None]:
for key in svm_score:
    print(key + ": " + str(np.average(svm_score[key])))