<a href="https://colab.research.google.com/github/arifzukri/arifzukri/blob/main/ML_Comp_Arif.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install keras-tuner -q
!pip install optuna

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import seaborn as sns
from keras.preprocessing import sequence 
from keras.models import Sequential 
from keras.layers import Dense, Embedding 
from keras.layers import LSTM
from keras_tuner import RandomSearch
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras_tuner.tuners import RandomSearch
from keras_tuner.engine.hyperparameters import HyperParameters
import keras_tuner as kerastuner
import time
import random as rn
from sklearn import metrics
import optuna


In [None]:
import os
os.environ['PYTHONHASHSEED'] = '0'
os.environ['CUDA_VISIBLE_DEVICES'] = ''
np.random.seed(123)
rn.seed(123)
tf.random.set_seed(123)

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/mystique1011/ML_competition/main/MEC_dataset/training_dataset.csv')

In [None]:
df['dist_geo_sqrt'] =  np.sqrt(df['dist_geo'])
df['dist_pop_sqrt'] =  np.sqrt(df['dist_pop'])
df['oneway_length_sqrt'] =  np.sqrt(df['oneway_length'])
df['area_reci_src'] =  1/df.area_src
df['area_reci_dst'] =  1/df.area_dst

In [None]:
df.shape

In [None]:
df1 = df.drop(['la_name_dst.1','la_name_src.1'],axis=1)

In [None]:
df1.shape

In [None]:
features = ['sourceid','dstid','dow','area_reci_src','area_reci_dst','fastest_travel_time','maxspeed_traveltime','dist_geo_sqrt','oneway_length_sqrt','la_name_src','la_name_dst','population_density_src','population_density_dst','direction']
X = df1[features]
y = df1['mean_travel_time']

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=123)

In [None]:
X.head()

In [None]:
X_train.shape
X_test.shape

In [None]:
y_train.shape
y_test.shape

In [None]:
len(X_train.columns)

# Model (ANN)

# Create Model

# Support Vector Regression

In [None]:
from sklearn.svm import SVR

In [None]:
def run(trial):
  degree = trial.suggest_int("degree",6,10)
  coef0 = trial.suggest_float("coef",0,1)
  tol = trial.suggest_float("tol",1e-3,1)
  C = trial.suggest_float("C",1e-1,100)
  epsilon = trial.suggest_float("epsilon",1e-4,1)

  trial_model = SVR(
                kernel = 'poly',
                degree = degree,
                gamma = 'scale',
                coef0 = coef0,
                tol = tol,
                C = C,
                epsilon = epsilon,
                shrinking = True,
                cache_size = 200,
                verbose = True,
                max_iter =-1)

  trial_model.fit(X_train,y_train)
  
  predictions = trial_model.predict(X_test)
  mse = metrics.mean_squared_error(y_test,predictions)
    
  return mse

In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(run, n_trials=100)

In [None]:
study.best_params

In [None]:
#poly kernel, 100 n_trial, mse=69131
#{'degree': 6, 'coef': 0.993171654157324, 'tol': 0.31283963254316527, 'C': 99.54428462839196, 'epsilon': 0.668497756210735}

In [None]:
#poly kernel, 100 n_trial, mse=115926
#{'C': 0.9983607825434719, 'coef': 0.9999527045602298, 'degree': 10, 'epsilon': 0.4931080277268556, 'tol': 0.7254521116988704}

In [None]:
#poly kernel, 100 n_trial, mse=205271
#{'C': 0.9997529324003172, 'coef': 0.9778269172087866, 'degree': 5, 'epsilon': 0.14717422307256925, 'tol': 0.7935926274355507}

In [None]:
#poly kernel, 100 n_trial, mse=119451
#{'C': 0.8687938956124598, 'coef0': 0.9990963737191921, 'degree': 10, 'epsilon': 0.004649773909768923, 'tol': 0.4986492791483438}

In [None]:
params = {'C': 0.8687938956124598,
 'coef0': 0.9990963737191921,
 'degree': 10,
 'epsilon': 0.004649773909768923,
 'tol': 0.4986492791483438}
sv = SVR(kernel = 'poly',
                gamma = 'scale',
                shrinking = True,
                cache_size = 200,
                verbose = False,
                max_iter =-1,**params)

sv.fit(X_train, y_train)
SVR_predictions = sv.predict(X_test)

print("MSE : ", metrics.mean_squared_error(y_test, SVR_predictions))

# Tuneable Model

In [None]:
def run2 (trial):
  h_layer = trial.susggest_int("h_layer", 128, 1024)
  o_layer = trial.suggest_int("o_layer", 128, 1024)
  learning_rate = trial.suggest("learning_rate")

In [None]:
LOG_DIR = f"{int(time.time())}"
initializer = tf.keras.initializers.GlorotNormal(seed=123)
def build_model(hp):
  inputs = keras.Input(shape=(14,), name="inputlayer")
  
  for i in range(hp.Int('n_layers', 1, 3)):
      x = layers.Dense(hp.Int(f'SRNN_{i}_units', min_value=128, max_value=1024, step=32), activation="relu", kernel_initializer=initializer)(inputs)

  outputs = layers.Dense(hp.Int('output_units', min_value=128, max_value=1024, step=32), activation="relu", kernel_initializer=initializer, name="predictions")(x)

  tf.keras.regularizers.l2(0.01)
  model = keras.Model(inputs=inputs, outputs=outputs)

  opt = tf.keras.optimizers.Adam(learning_rate=0.01)
  ls = tf.keras.losses.MeanSquaredError()
  met = 'RootMeanSquaredError'

  model.compile(optimizer=opt, loss=ls,metrics=[met])

  return model

In [None]:
tuner = RandomSearch(
    build_model,
    objective = kerastuner.Objective("val_root_mean_squared_error", direction="min"),
    max_trials = 3,
    executions_per_trial = 5,
    directory = LOG_DIR
)

tuner.search(x=X_train,
             y=y_train,
             epochs=10,
             batch_size=32,
             validation_data=(X_test,y_test))

In [None]:
tuner.results_summary()

# First Model

In [None]:
initializer = tf.keras.initializers.GlorotNormal

inputs = keras.Input(shape=(14,), name="digits")
x1 = layers.Dense(992, kernel_initializer=initializer, activation="relu", name="dense_1")(inputs)
x2 = layers.Dense(736, kernel_initializer=initializer, activation="relu", name="dense_2")(x1)
outputs = layers.Dense(160, kernel_initializer=initializer, activation="relu", name="predictions")(x2)
 
tf.keras.regularizers.l2(l2=0.01)
model = keras.Model(inputs=inputs, outputs=outputs)

In [None]:
model.summary()

# Compile Model

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=0.01)
ls = tf.keras.losses.MeanSquaredError()
met = 'RootMeanSquaredError'

model.compile(optimizer=opt, loss=ls,metrics=[met])

print("Fit model on training data")
history = model.fit(
    X_train,
    y_train,
    epochs=100,
    verbose=2,
    validation_split=0.2,
    validation_data=(X_test, y_test),
)

# Model Evaluation

In [None]:
print('Evaluate on thest data')
results = model.evaluate(X_test, y_test)
print('test loss, test rmse: ', results)


In [None]:
print(history.history.keys())

In [None]:
loss_train = history.history['loss']
loss_val = history.history['val_loss']
epochs = range(1,101)
plt.plot(epochs, loss_train, 'g', label='Training loss')
plt.plot(epochs, loss_val, 'b', label='validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
rmse_train = history.history['root_mean_squared_error']
rmse_val = history.history['val_root_mean_squared_error']
epochs = range(1,101)
plt.plot(epochs, rmse_train, 'g', label='Training rmse')
plt.plot(epochs, rmse_val, 'b', label='validation rmse')
plt.title('Training and rmse')
plt.xlabel('Epochs')
plt.ylabel('rmse')
plt.legend()
plt.show()

In [None]:
model.evaluate(
    x=X_test,
    y=y_test,
    batch_size=None,
    verbose=2,
    sample_weight=None,
    steps=None,
    callbacks=None,
    max_queue_size=10,
    workers=1,
    use_multiprocessing=False,
    return_dict=False,
)