<a href="https://colab.research.google.com/github/eternityduck/ML_KPI/blob/main/Lab1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a href="https://colab.research.google.com/github/https-deeplearning-ai/tensorflow-1-public/blob/main/C4/W2/ungraded_labs/C4_W2_Lab_3_deep_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Imports

In [None]:
%pip install tensorflow_decision_forests
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import tensorflow_decision_forests as tfdf
from google.colab import drive
drive.mount('/content/drive')
import pandas as pd

# Replace the file ID with your own
file_id = '1eKw4Z67VTlfML7j1cfx2b3V76mEqEDm5'
link = f'https://drive.google.com/uc?id={file_id}'
df = pd.read_csv(link)
df_tfdf = tfdf.keras.pd_dataframe_to_tf_dataset(df, label="Activity")
target = df.pop("Activity")

## Utilities

In [None]:
from sklearn.metrics import auc
df_tens = tf.convert_to_tensor(df)
print(df.head(10))

def plotilka_precision_recall(history):
  plt.figure()
  plt.step(history['recall'], history['precision'], where='post' )
  plt.xlabel('Recall')
  plt.ylabel('Precision')
  plt.xlim(min(history['recall']), max(history['recall']))
  plt.ylim(min(history['precision']), max(history['precision']))
  plt.title('Precision-Recall Curve')
  plt.show()
  plt.close()

def plotilka_ROC(history):
  fpr = history['false_positives']
  tpr = history['true_positives']
  roc_auc = history['auc']
  # roc_auc = auc(fpr, tpr)
  plt.figure(figsize=(8, 6))
  plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {max(roc_auc):.2f})')
  plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
  plt.ylim([min(tpr), max(tpr)])
  plt.xlim([min(fpr), max(fpr)])
  plt.ylabel('True Positive Rate')
  plt.xlabel('False Positive Rate')
  plt.title('Receiver Operating Characteristic (ROC) Curve')
  plt.legend(loc='lower right')
  plt.grid(True)
  plt.show()
  plt.close()

## Shallow Tree

In [None]:
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.metrics import *

shallow_model = tf.keras.Sequential([
  tf.keras.layers.Dense(1777, activation='relu'),
  tf.keras.layers.Dense(1, activation='sigmoid'),
])

shallow_model.compile(optimizer=RMSprop(learning_rate=1e-4),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['Precision', 'accuracy', Recall(name="recall"), F1Score(), "TruePositives", "FalsePositives", "AUC"])



## Fit

In [None]:
history = shallow_model.fit(df_tens, np.array([float(i) for i in target]), epochs=15, batch_size=128, validation_split=0.2)


# Precision Recall Shallow Model

In [None]:
print(history.history)
plotilka_precision_recall(history.history)
plotilka_ROC(history.history)


## Deep Tree



In [None]:
deep_model = tf.keras.Sequential([
  tf.keras.layers.Dense(1776, activation='relu'),
  tf.keras.layers.Dense(1024, activation='relu'),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dense(256, activation='relu'),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(64, activation='relu'),
  tf.keras.layers.Dense(1, activation='sigmoid'),
])

deep_model.compile(optimizer=RMSprop(learning_rate=1e-4),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['Precision', 'accuracy', Recall(name="recall"), F1Score(), "TruePositives", "FalsePositives", "AUC"])

In [None]:
deep_history = deep_model.fit(df_tens, np.array([float(i) for i in target]), epochs=15, batch_size=256, validation_split=0.2)

# Precision Recall Deep Model

In [None]:
print(deep_history.history)
plotilka_precision_recall(deep_history.history)
plotilka_ROC(deep_history.history)

# CNN Network

In [None]:
cnn_model = tf.keras.Sequential([
  tf.keras.layers.Conv1D(filters=64, kernel_size=15,
                      strides=1,
                      activation="relu",
                      padding='causal',
                      input_shape=[None, 1776]),
  tf.keras.layers.LSTM(1024, return_sequences=True),
  tf.keras.layers.LSTM(1024),
  tf.keras.layers.Dense(256, activation='relu'),
  tf.keras.layers.Dense(1, activation='sigmoid'),
])

cnn_model.compile(optimizer=RMSprop(learning_rate=1e-4),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['Precision', 'accuracy', Recall(), F1Score()])

In [None]:
cnn_model.fit(tf.expand_dims(df_tens, axis=1), np.asarray(target).astype('float32').reshape((-1,1)), epochs=15, batch_size=32, validation_split=0.2)

## Shallow Decision Forest




In [None]:

# Maximum number of decision trees. The effective number of trained trees can be smaller if early stopping is enabled.
NUM_TREES = 250
# Minimum number of examples in a node.
MIN_EXAMPLES = 4
# Maximum depth of the tree. max_depth=1 means that all trees will be roots.
MAX_DEPTH = 2
# Ratio of the dataset (sampling without replacement) used to train individual trees for the random sampling method.
SUBSAMPLE = 0.65
# Control the sampling of the datasets used to train individual trees.
SAMPLING_METHOD = "RANDOM"
# Ratio of the training dataset used to monitor the training. Require to be >0 if early stopping is enabled.
VALIDATION_RATIO = 0.2

gbt_model = tfdf.keras.GradientBoostedTreesModel(
      num_trees=NUM_TREES,
      max_depth=MAX_DEPTH,
      min_examples=MIN_EXAMPLES,
      subsample=SUBSAMPLE,
      validation_ratio=VALIDATION_RATIO,
      task=tfdf.keras.Task.CLASSIFICATION,
      verbose = 2,
      sampling_method=SAMPLING_METHOD,
      loss = "BINOMIAL_LOG_LIKELIHOOD",
      early_stopping="NONE"
  )

gbt_model.compile(metrics=['Precision', 'accuracy', Recall(name="recall"), F1Score()])

history = gbt_model.fit(df_tfdf, epochs=1, verbose=2)
print(gbt_model.metrics)

## Deep Decision Forest



In [None]:



# Maximum number of decision trees. The effective number of trained trees can be smaller if early stopping is enabled.
NUM_TREES = 250
# Minimum number of examples in a node.
MIN_EXAMPLES = 4
# Maximum depth of the tree. max_depth=1 means that all trees will be roots.
MAX_DEPTH = 16
# Ratio of the dataset (sampling without replacement) used to train individual trees for the random sampling method.
SUBSAMPLE = 0.65
# Control the sampling of the datasets used to train individual trees.
SAMPLING_METHOD = "RANDOM"
# Ratio of the training dataset used to monitor the training. Require to be >0 if early stopping is enabled.
VALIDATION_RATIO = 0.2

gbt_model = tfdf.keras.GradientBoostedTreesModel(
      num_trees=NUM_TREES,
      max_depth=MAX_DEPTH,
      min_examples=MIN_EXAMPLES,
      subsample=SUBSAMPLE,
      validation_ratio=VALIDATION_RATIO,
      task=tfdf.keras.Task.CLASSIFICATION,
      verbose = 2,
      sampling_method=SAMPLING_METHOD,
      loss = "BINOMIAL_LOG_LIKELIHOOD",
      early_stopping="NONE"
  )

gbt_model.compile(metrics=['Precision', 'accuracy', Recall(), F1Score()])

history = gbt_model.fit(df_tfdf, epochs=1, verbose=2)
#accuracy = gbt_model.evaluate(df_tfdf_test, verbose=2)

#Conclusion
In our experiments, we observed that the model's performance varied depending on the algorithm and hyperparameters. After careful evaluation, we identified a model that achieved an accuracy rate of 79%. This indicates that our best-performing model was able to correctly classify the biological molecule responses with a high degree of accuracy. However, it's essential to consider the specific domain and application to determine if this level of accuracy is sufficient for practical use.

In conclusion, this laboratory work provided valuable insights into the application of machine learning techniques for classification tasks, emphasizing the importance of selecting appropriate algorithms, hyperparameters, and evaluation metrics. We have demonstrated that achieving high accuracy is possible, but further considerations, such as domain-specific constraints and business requirements, are crucial in determining the suitability of the model for real-world applications.