In [None]:
import pandas as pd
import numpy as np

In [None]:
with open('database.json') as file:
    data = pd.read_json(file)


In [None]:
data.style.set_properties(subset=['weather_forecast'], **{'width': '500px'})

In [None]:
data["future_temp"] = data["weather_forecast"].apply(lambda x: x["temp"])
data["future_humidity"] = data["weather_forecast"].apply(lambda x: x["humidity"])
data["future_uv"] = data["weather_forecast"].apply(lambda x: x["uv_index"])
data["future_rainfall"] = data["weather_forecast"].apply(lambda x: x["rainfall"])
data["future_wind_speed"] = data["weather_forecast"].apply(lambda x: x["wind_speed"])

In [None]:
data.style.set_properties(subset=['weather_forecast'], **{'width': '500px'})

In [None]:
selected = [1 ,2 ,3 ,7 ,8 ,9 , 11, 12, 13, 14, 15]

In [None]:
selected_data = data.iloc[:,selected]

In [None]:
data_mean = np.mean(selected_data, axis=0)
data_sd = np.std(selected_data, axis=0)
adjusted_data = (selected_data - data_mean) / data_sd

<h1 style="text-align: center"> Data Splitting<h1>

In [None]:
def shuffle_data_numpy(X, y, numpy_seed):
    # fix the random seed
    np.random.seed(numpy_seed)

    # TODO Task 1.1
    # shuffle the given data pair (X, y)
    # please use numpy functions so that the results are controled by np.random.seed(numpy_seed)
    shuffled_array = np.random.permutation(X.shape[0])
    X_shuffle = X[shuffled_array]
    y_shuffle = y[shuffled_array]
    

    return X_shuffle, y_shuffle

def train_val_split(X_trainval, y_trainval, train_size, numpy_seed):
    # TODO TASK 1.2 
    # apply shuffle on the data with given random seed, then split the data into training and validation sets
    
    X_shuffle , y_shuffle = shuffle_data_numpy(X_trainval, y_trainval, numpy_seed)
    X_train = X_shuffle[:train_size]
    y_train = y_shuffle[:train_size]
    X_val = X_shuffle[train_size:]
    y_val = y_shuffle[train_size:]

    return X_train, X_val, y_train, y_val

In [None]:
truth = np.array([[26,10],[22, 6], [24, 7], [26, 8], [22, 6], [27, 8], [19, 0], [18, 2], [26, 5], [32, 8], [31, 9], [21, 2], [22, 8], [15, 2], [21, 2], [34, 10], [27, 7], [21, 2], [21, 4], [15, 1], [24, 3], [31, 3], [34, 4], [11, 0], [24, 5], [30, 7], [33,8], [13, 1], [23, 7], [29, 6], [30, 8], [25, 3], [25, 3], [13, 0], [23, 1], [32, 2], [34, 1], [11, 0], [33, 10], [26, 4]])


In [None]:
truth_mean = np.mean(truth, axis=0)
truth_sd = np.std(truth, axis=0)
adjusted_truth = (truth - truth_mean )/ truth_sd

In [None]:
X_train, X_val, y_train, y_val = train_val_split(np.array(adjusted_data), np.array(adjusted_truth), int(adjusted_data.shape[0]*0.7), 1)

In [None]:
print(X_train)
print(X_val)
print(y_train)
print(y_val)

In [None]:
# No additional import allowed
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from keras.layers import Dense

def MyModel(input_dim, dropout_ratio):
    # Create a sequential model
    model = Sequential()


    model.build((None,input_dim))
    
    
    
    model.add(Dense(units = 64, activation = 'relu', kernel_initializer = "uniform"))
    model.add(Dropout(rate = dropout_ratio))
    
    model.add(Dense(units = 32, activation = 'relu', kernel_initializer = "uniform"))
    model.add(Dropout(rate = dropout_ratio))
    
    model.add(Dense(units = 16, activation = 'relu', kernel_initializer = "uniform"))
    model.add(Dropout(rate = dropout_ratio))
    
    model.add(Dense(units = 4, activation = 'relu', kernel_initializer = "uniform"))
    model.add(Dropout(rate = dropout_ratio))
    
    
    model.add(Dense(units = 1, activation = 'sigmoid', kernel_initializer = "uniform"))
    
    
    
    return model

In [None]:
# Keep them as the default setting for the model you submitted to ZINC!
input_dim = len(selected)
dropout_ratio = 0.1

In [None]:
from tensorflow.keras.optimizers import Adam

def MyModel_Training(model, X_train, y_train, X_val, y_val, batchsize, train_epoch):

    # TODO Task 2.2
    # Compile and train the given model
    # Hint: history can be returned by model.fit() function, please see https://keras.io/api/models/model_training_apis/
    adam_optimizer = Adam(learning_rate = 1e-3)
    
    model.compile(
        optimizer= adam_optimizer, 
        loss = 'mse',
        metrics =['mae'])
    
    history = model.fit(x = X_train, y = y_train, batch_size = batchsize, epochs = train_epoch, validation_data = (X_val, y_val))
    
    
    return history, model

model = MyModel(input_dim, dropout_ratio)

batchsize = 4
train_epoch = 50

history, model = MyModel_Training(model, X_train, y_train[:,0], X_val, y_val[:,0], batchsize, train_epoch)
test_loss, test_mae = model.evaluate(X_val, y_val, verbose=1)
print(f'Test Mean Average Error (MAE): {test_mae}')
model.summary()

In [None]:
test = [38.5, 80, 10000, 9, 5, 2, 40.5, 90, 6, 0, 0]
adjusted_test = (test - data_mean) / data_sd
prediction = model.predict(np.array(adjusted_test).reshape(1, -1))
prediction*truth_sd[0] + truth_mean[0]

In [None]:
model.save('./smartHome.keras')

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(8, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['mae'], label='Training mae')
plt.plot(history.history['val_mae'], label='Validation mae')
plt.xlabel('Epochs')
plt.ylabel('MAE')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
test_predictions = model.predict(X_val).flatten()

plt.figure(figsize=(6, 6))
plt.scatter(y_val[:,0], test_predictions)
plt.xlabel('Ground True Values for optimal Air conditioning temperature')
plt.ylabel('Predictions for Air conditioning temperature)')
# plt.axis('equal')
# plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 3000], [-100, 3000])