In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
pip install catboost



In [3]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn_pandas import DataFrameMapper
from sklearn.preprocessing import StandardScaler, LabelBinarizer
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet, HuberRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

from catboost import CatBoostRegressor

import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow.keras.utils import to_categorical

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [4]:
df = pd.read_csv('gdrive/My Drive/Colab Notebooks/data/clean_data.csv')

In [5]:
del df["Unnamed: 0"]

In [6]:
target = 'water_temperature'
X = df.drop(target, axis=1)
y = df[target]

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [8]:
mapper = DataFrameMapper([
    ("beach_name", LabelBinarizer()),
    (['turbidity'], StandardScaler()),
    (['wave_height'], StandardScaler()),
    (['wave_period'], StandardScaler()),
    (['battery_life'], StandardScaler()),
    ('time_of_day', LabelBinarizer()),
    (['month'], StandardScaler()),
    (['day'], StandardScaler()),
    (['year'], StandardScaler())],df_out=True)

In [9]:
Z_train = mapper.fit_transform(X_train)
Z_test = mapper.transform(X_test)

In [10]:
model = LinearRegression().fit(Z_train,y_train)
print("LinearRegression train score is " + str(model.score(Z_train,y_train)))
print("LinearRegression test score is " + str(model.score(Z_test,y_test)))

print("Mean squared error is " + str(mean_squared_error(y_test, model.predict(Z_test))**(1/2)))

LinearRegression train score is 0.3361214452356903
LinearRegression test score is 0.345253457116571
Mean squared error is 2.693253791542149


In [11]:
model = HuberRegressor().fit(Z_train,y_train)
print("HuberRegressor train score is " + str(model.score(Z_train,y_train)))
print("HuberRegressor test score is " + str(model.score(Z_test,y_test)))

print("Mean squared error is " + str(mean_squared_error(y_test, model.predict(Z_test))**(1/2)))

HuberRegressor train score is 0.3188228629326283
HuberRegressor test score is 0.3228959492025364
Mean squared error is 2.738850827623488


In [12]:
model = Lasso().fit(Z_train,y_train)
print("Lasso train score is " + str(model.score(Z_train,y_train)))
print("Lasso test score is " + str(model.score(Z_test,y_test)))

print("Mean squared error is " + str(mean_squared_error(y_test, model.predict(Z_test))**(1/2)))

Lasso train score is 0.16141263387856197
Lasso test score is 0.1628646923796898
Mean squared error is 3.045359043114319


In [13]:
model = Ridge().fit(Z_train,y_train)
print("Ridge train score is " + str(model.score(Z_train,y_train)))
print("Ridge test score is " + str(model.score(Z_test,y_test)))

print("Mean squared error is " + str(mean_squared_error(y_test, model.predict(Z_test))**(1/2)))

Ridge train score is 0.3313272395028093
Ridge test score is 0.3386464269971815
Mean squared error is 2.7068084589642116


In [14]:
model = ElasticNet().fit(Z_train,y_train)
print("ElasticNet train score is " + str(model.score(Z_train,y_train)))
print("ElasticNet test score is " + str(model.score(Z_test,y_test)))

print("Mean squared error is " + str(mean_squared_error(y_test, model.predict(Z_test))**(1/2)))

ElasticNet train score is 0.18153664758486832
ElasticNet test score is 0.1830049361983803
Mean squared error is 3.0085025865285444


In [15]:
model = RandomForestRegressor().fit(Z_train,y_train)
print("RandomForestRegressor train score is " + str(model.score(Z_train,y_train)))
print("RandomForestRegressor test score is " + str(model.score(Z_test,y_test)))

print("Mean squared error is " + str(mean_squared_error(y_test, model.predict(Z_test))**(1/2)))

RandomForestRegressor train score is 0.9950263245608514
RandomForestRegressor test score is 0.9602573346692361
Mean squared error is 0.6635431462753668


In [16]:
model = DecisionTreeRegressor().fit(Z_train,y_train)
print("DecisionTreeRegressor train score is " + str(model.score(Z_train,y_train)))
print("DecisionTreeRegressor test score is " + str(model.score(Z_test,y_test)))

print("Mean squared error is " + str(mean_squared_error(y_test, model.predict(Z_test))**(1/2)))

DecisionTreeRegressor train score is 0.9999876538821932
DecisionTreeRegressor test score is 0.9352042433614767
Mean squared error is 0.8472546395891352


In [17]:
model = CatBoostRegressor().fit(Z_train,y_train)
print("CatBoostRegressor train score is " + str(model.score(Z_train,y_train)))
print("CatBoostRegressor test score is " + str(model.score(Z_test,y_test)))

print("Mean squared error is " + str(mean_squared_error(y_test, model.predict(Z_test))**(1/2)))

Learning rate set to 0.069713
0:	learn: 3.2502949	total: 62.2ms	remaining: 1m 2s
1:	learn: 3.1309283	total: 68.3ms	remaining: 34.1s
2:	learn: 3.0210214	total: 74.2ms	remaining: 24.7s
3:	learn: 2.9259796	total: 80.2ms	remaining: 20s
4:	learn: 2.8185029	total: 86.1ms	remaining: 17.1s
5:	learn: 2.7394700	total: 91.7ms	remaining: 15.2s
6:	learn: 2.6463604	total: 97.6ms	remaining: 13.8s
7:	learn: 2.5764253	total: 104ms	remaining: 12.9s
8:	learn: 2.5137759	total: 110ms	remaining: 12.1s
9:	learn: 2.4617714	total: 116ms	remaining: 11.5s
10:	learn: 2.3833172	total: 122ms	remaining: 11s
11:	learn: 2.3321820	total: 131ms	remaining: 10.7s
12:	learn: 2.2895695	total: 136ms	remaining: 10.4s
13:	learn: 2.2350300	total: 142ms	remaining: 10s
14:	learn: 2.1967752	total: 148ms	remaining: 9.74s
15:	learn: 2.1559584	total: 154ms	remaining: 9.49s
16:	learn: 2.1015660	total: 160ms	remaining: 9.26s
17:	learn: 2.0664209	total: 166ms	remaining: 9.06s
18:	learn: 2.0363372	total: 172ms	remaining: 8.88s
19:	learn:

In [18]:
model = Sequential([
    Input(shape=(Z_train.shape[1],)),
    Dense(256, activation='sigmoid'),
    Dense(128, activation="relu"),
    Dense(64, activation='selu'),
    Dense(32, activation='swish'),
    Dense(16, activation='relu'),
    Dense(1, activation='elu')
])

In [19]:
model.compile(loss='mae', optimizer='adam')

In [20]:
history = model.fit(Z_train, y_train,
                    validation_data=(Z_test, y_test),
                    epochs=100, batch_size=16,
                    verbose=2)

Epoch 1/100
1627/1627 - 3s - loss: 2.2490 - val_loss: 1.6252
Epoch 2/100
1627/1627 - 2s - loss: 1.6211 - val_loss: 1.4544
Epoch 3/100
1627/1627 - 2s - loss: 1.4224 - val_loss: 1.2785
Epoch 4/100
1627/1627 - 2s - loss: 1.2395 - val_loss: 1.2258
Epoch 5/100
1627/1627 - 2s - loss: 1.1734 - val_loss: 1.1474
Epoch 6/100
1627/1627 - 2s - loss: 1.1071 - val_loss: 1.1111
Epoch 7/100
1627/1627 - 2s - loss: 1.0782 - val_loss: 1.0352
Epoch 8/100
1627/1627 - 2s - loss: 1.0402 - val_loss: 0.9962
Epoch 9/100
1627/1627 - 2s - loss: 1.0301 - val_loss: 1.0454
Epoch 10/100
1627/1627 - 2s - loss: 1.0074 - val_loss: 0.9484
Epoch 11/100
1627/1627 - 2s - loss: 0.9958 - val_loss: 0.9537
Epoch 12/100
1627/1627 - 2s - loss: 0.9662 - val_loss: 1.0898
Epoch 13/100
1627/1627 - 2s - loss: 0.9665 - val_loss: 0.9348
Epoch 14/100
1627/1627 - 2s - loss: 0.9424 - val_loss: 0.9344
Epoch 15/100
1627/1627 - 2s - loss: 0.9252 - val_loss: 0.9581
Epoch 16/100
1627/1627 - 2s - loss: 0.9145 - val_loss: 0.9989
Epoch 17/100
1627

In [21]:
print(str(model.evaluate(Z_train, y_train, verbose=0)))
print(str(model.evaluate(Z_test, y_test, verbose=0)))

0.6161405444145203
0.6599445939064026
