<a href="https://colab.research.google.com/github/ayyucedemirbas/tf_decision_forests_emotion_classification/blob/main/tf_random_forests.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q tensorflow_decision_forests

[K     |████████████████████████████████| 16.0 MB 4.6 MB/s 
[K     |████████████████████████████████| 511.7 MB 6.4 kB/s 
[K     |████████████████████████████████| 5.8 MB 46.8 MB/s 
[K     |████████████████████████████████| 438 kB 56.0 MB/s 
[K     |████████████████████████████████| 1.6 MB 59.6 MB/s 
[?25h

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

import os
import random
import warnings


def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)


warnings.filterwarnings("ignore")
seed_everything(42)

In [None]:
import tensorflow_decision_forests as tfdf
tfdf.keras.get_all_models()

[tensorflow_decision_forests.keras.RandomForestModel,
 tensorflow_decision_forests.keras.GradientBoostedTreesModel,
 tensorflow_decision_forests.keras.CartModel,
 tensorflow_decision_forests.keras.DistributedGradientBoostedTreesModel]

In [None]:
df = pd.read_csv("emotion.csv")

In [None]:
df.head()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
0,Happy,0.611963,0.568824,-2.343653,0.999881,0.668213,0.449927,-2.276455,0.999847,0.702016,...,-0.039163,0.0,0.73965,0.421451,0.002701,0.0,0.748513,0.410192,0.003465,0.0
1,Happy,0.617652,0.566049,-2.323081,0.999881,0.669807,0.449244,-2.256609,0.99985,0.703753,...,-0.039959,0.0,0.741532,0.424179,0.002447,0.0,0.750179,0.41463,0.003384,0.0
2,Happy,0.618621,0.565547,-2.25067,0.999878,0.670322,0.449418,-2.187487,0.99985,0.704503,...,-0.042809,0.0,0.741137,0.438544,-0.001054,0.0,0.749507,0.429632,-0.000409,0.0
3,Happy,0.618629,0.565881,-2.115307,0.999877,0.670325,0.451597,-2.054101,0.999853,0.704558,...,-0.040105,0.0,0.740903,0.440421,0.002107,0.0,0.749409,0.431513,0.00294,0.0
4,Happy,0.618577,0.565814,-2.319589,0.999878,0.670324,0.451619,-2.255415,0.999854,0.704578,...,-0.040787,0.0,0.740981,0.427238,0.00124,0.0,0.749257,0.418693,0.001937,0.0


In [16]:
df=df.replace("Happy", 1)

In [17]:
df=df.replace("Sad", 0)

In [None]:
df.dropna(axis=1, inplace=True)

In [19]:
# Splitting the dataset

train_df, test_df = train_test_split(
    df, test_size=0.3, stratify=df["class"], random_state=42
)

In [20]:
# Convert the dataset into a TensorFlow dataset

train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_df, label="class")
test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(test_df, label="class")

In [21]:
model_rf = tfdf.keras.RandomForestModel()
model_rf.fit(train_ds)

Use /tmp/tmpwngoo8ws as temporary training directory
Reading training dataset...
Training dataset read in 0:00:24.130650. Found 308 examples.
Training model...
Model trained in 0:00:00.808146
Compiling model...
Model compiled.


<keras.callbacks.History at 0x7f8eb148be90>

In [None]:
model_rf.summary()

In [23]:

preds_rf = np.where(model_rf.predict(test_ds) < 0.5, 0, 1).ravel()

acc_rf = accuracy_score(test_df["class"].values, preds_rf)

print(f"Test set accuracy of Random Forest model is {acc_rf:.6f}")



Test set accuracy of Random Forest model is 1.000000


In [27]:
model_gbt = tfdf.keras.GradientBoostedTreesModel()
model_gbt.fit(train_ds)

Use /tmp/tmpitw9gtso as temporary training directory
Reading training dataset...
Training dataset read in 0:00:34.685166. Found 308 examples.
Training model...
Model trained in 0:01:37.629507
Compiling model...




Model compiled.


<keras.callbacks.History at 0x7f8ea74dcc10>

In [28]:
preds_gbt = np.where(model_gbt.predict(test_ds) < 0.5, 0, 1).ravel()

acc_gbt = accuracy_score(test_df["class"].values, preds_gbt)

print(f"Test set accuracy of Gradient Boosted Trees model is {acc_gbt:.6f}")

Test set accuracy of Gradient Boosted Trees model is 1.000000


In [29]:
model_cart = tfdf.keras.CartModel()
model_cart.fit(train_ds)

Use /tmp/tmpt4sh81ut as temporary training directory
Reading training dataset...




Training dataset read in 0:00:24.820457. Found 308 examples.
Training model...
Model trained in 0:00:00.279814
Compiling model...
Model compiled.




<keras.callbacks.History at 0x7f8ea10a2b10>

In [30]:
preds_cart = np.where(model_cart.predict(test_ds) < 0.5, 0, 1).ravel()

acc_cart = accuracy_score(test_df["class"].values, preds_cart)

print(f"Test set accuracy of CART model is {acc_cart:.6f}")

Test set accuracy of CART model is 0.894737


In [24]:
model_rf.save("model")



In [26]:
import tensorflow as tf


converter = tf.lite.TFLiteConverter.from_keras_model(model_rf)
tflite_model = converter.convert()

# Save the model to disk
open("model_rf.tflite", "wb").write(tflite_model)
  
import os
basic_model_size = os.path.getsize("model_rf.tflite")
print("Model is %d bytes" % basic_model_size)



ConverterError: ignored