In [1]:
!pip install scikit-learn pandas numpy matplotlib seaborn



In [2]:
from google.colab import files
uploaded = files.upload() # link to dataset https://github.com/AlihamzaMaan/Random-Forest-Classification/blob/main/dataset_1.csv

Saving modeling_dataset.csv to modeling_dataset.csv


In [3]:
import pandas as pd

df = pd.read_csv("modeling_dataset.csv")
print(df.head())  # Display the first few rows

   Unnamed: 0  packet_count  total_bytes  avg_packet_size  \
0           0          4155    6034168.0      1452.266667   
1           1          3388    4850245.0      1431.595336   
2           2          3664    5287877.0      1443.197871   
3           3          4108    5948521.0      1448.033350   
4           4          4412    6419061.0      1454.909565   

   avg_inter_packet_delay  std_inter_packet_delay  chunk_count  avg_ssim  \
0                1.657439                5.760497           14  0.972431   
1                2.218187                9.324953           10  0.971165   
2                1.985804                7.874238           10  0.971165   
3                1.688093                5.893905           11  0.972424   
4                1.531172                4.442955           11  0.980529   

     avg_format  avg_chunk_size  
0  1920x1080-22   401506.214286  
1  1920x1080-22   499983.900000  
2  1920x1080-22   499983.900000  
3  1920x1080-22   509411.000000  
4  192

In [4]:
!pip install trustee

Collecting trustee
  Downloading trustee-1.1.6-py3-none-any.whl.metadata (6.3 kB)
Collecting furo<2023.0.0,>=2022.6.21 (from trustee)
  Downloading furo-2022.12.7-py3-none-any.whl.metadata (6.0 kB)
Collecting pandas<2.0.0,>=1.1.0 (from trustee)
  Downloading pandas-1.5.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting prettytable==3.0.0 (from trustee)
  Downloading prettytable-3.0.0-py3-none-any.whl.metadata (22 kB)
Collecting setuptools<58.0.0,>=57.0.0 (from trustee)
  Downloading setuptools-57.5.0-py3-none-any.whl.metadata (4.9 kB)
Collecting sphinx-gallery<0.12.0,>=0.11.1 (from trustee)
  Downloading sphinx_gallery-0.11.1-py3-none-any.whl.metadata (4.9 kB)
Collecting sphinxemoji<0.3.0,>=0.2.0 (from trustee)
  Downloading sphinxemoji-0.2.0.tar.gz (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting term

In [5]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from trustee import RegressionTrustee
from sklearn.metrics import mean_absolute_percentage_error


# Load dataset
df = pd.read_csv("/content/modeling_dataset.csv")

# Drop rows with missing target
df = df.dropna(subset=['avg_ssim'])

# Convert SSIM to dB
df['avg_ssim_db'] = -10 * np.log10(1 - df['avg_ssim'])

# Optional: Handle SSIM = 1 → inf case
df['avg_ssim_db'] = df['avg_ssim_db'].replace([np.inf, -np.inf], np.nan).dropna()

# Define features and target
features = [
    'packet_count',
    'total_bytes',
    'avg_packet_size',
    'avg_inter_packet_delay',
    'std_inter_packet_delay'
]
X = df[features].fillna(0)
y = df['avg_ssim_db']  # now using decibel values

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest Regressor
reg = RandomForestRegressor(n_estimators=100, random_state=42)
reg.fit(X_train, y_train)

# Predict and evaluate
y_pred = reg.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage
r2 = r2_score(y_test, y_pred)

# Print results
print("MSE:", mse)
print("RMSE:", rmse)
print("MAPE:", mape, "%")
print("R² Score:", r2)

# Explain model using RegressionTrustee
trustee = RegressionTrustee(expert=reg)
trustee.fit(X_train, y_train, num_iter=50, num_stability_iter=10, samples_size=0.3, verbose=True)
dt, pruned_dt, agreement, reward = trustee.explain()
dt_y_pred = dt.predict(X_test)

# Evaluate fidelity (tree vs expert) and performance (tree vs ground truth)
# Fidelity: Tree vs Forest
print("\nFidelity (Tree vs Forest):")
mse_fidelity = mean_squared_error(y_pred, dt_y_pred)
rmse_fidelity = np.sqrt(mse_fidelity)
mape_fidelity = mean_absolute_percentage_error(y_pred, dt_y_pred) * 100
r2_fidelity = r2_score(y_pred, dt_y_pred)

print("MSE:", mse_fidelity)
print("RMSE:", rmse_fidelity)
print("MAPE:", mape_fidelity, "%")
print("R²:", r2_fidelity)

# Performance: Tree vs Ground Truth
print("\nPerformance (Tree vs Ground Truth):")
mse_perf = mean_squared_error(y_test, dt_y_pred)
rmse_perf = np.sqrt(mse_perf)
mape_perf = mean_absolute_percentage_error(y_test, dt_y_pred) * 100
r2_perf = r2_score(y_test, dt_y_pred)

print("MSE:", mse_perf)
print("RMSE:", rmse_perf)
print("MAPE:", mape_perf, "%")
print("R²:", r2_perf)

MSE: 6.17848338301275
RMSE: 2.4856555238030773
MAPE: 9.028268259247328 %
R² Score: 0.694859986025132
Initializing training dataset using RandomForestRegressor(random_state=42) as expert model
Expert model score: 0.9531257640596111
Initializing Trustee outer-loop with 10 iterations
########## Outer-loop Iteration 0/10 ##########
Initializing Trustee inner-loop with 10 iterations
########## Inner-loop Iteration 0/50 ##########
Sampling 365 points from training dataset with (1218, 1218) entries
Student model 0-0 trained with depth 19 and 255 leaves:
Student model score: 0.6728845033498676
Student model 0-0 fidelity: 0.6728845033498676
########## Inner-loop Iteration 1/50 ##########
Sampling 365 points from training dataset with (1328, 1328) entries
Student model 0-1 trained with depth 21 and 251 leaves:
Student model score: 0.6278819883347663
Student model 0-1 fidelity: 0.6278819883347663
########## Inner-loop Iteration 2/50 ##########
Sampling 365 points from training dataset with (1438,



In [6]:
import os
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from trustee.report.trust import TrustReport
from sklearn.utils.multiclass import check_classification_targets
from trustee import RegressionTrustee
from sklearn.metrics import mean_absolute_percentage_error


# Load dataset
df = pd.read_csv("/content/modeling_dataset.csv")

# Drop rows with missing target
df = df.dropna(subset=['avg_ssim'])

# Convert SSIM to dB
df['avg_ssim_db'] = -10 * np.log10(1 - df['avg_ssim'])

# Optional: Handle SSIM = 1 → inf case
df['avg_ssim_db'] = df['avg_ssim_db'].replace([np.inf, -np.inf], np.nan).dropna()

# Define features and target
features = [
    'packet_count',
    'total_bytes',
    'avg_packet_size',
    'avg_inter_packet_delay',
    'std_inter_packet_delay'
]
X = df[features].fillna(0)
y = df['avg_ssim_db']  # now using decibel values

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest Regressor
reg = RandomForestRegressor(n_estimators=100, random_state=42)
reg.fit(X_train, y_train)

# Predict and evaluate
y_pred = reg.predict(X_test)
print("MSE:", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))

# Explain model using RegressionTrustee
trustee = RegressionTrustee(expert=reg)
trustee.fit(X_train, y_train, num_iter=50, num_stability_iter=10, samples_size=0.3, verbose=True)
dt, pruned_dt, agreement, reward = trustee.explain()
dt_y_pred = dt.predict(X_test)

# === Run TrustReport ===
trust_report = TrustReport(
    reg,
    X=X,
    y=y,
    X_train=X_train,
    y_train=y_train,
    X_test=X_test,
    y_test=y_test,
    max_iter=5,
    num_pruning_iter=0,
    trustee_num_iter=10,
    trustee_num_stability_iter=5,
    trustee_sample_size=0.3,
    analyze_branches=True,
    analyze_stability=True,
    top_k=10,
    verbose=True,
    feature_names=features,
    is_classify=False,  #regression mode
)

#Save report
OUTPUT_PATH = "out/"
REPORT_PATH = f"{OUTPUT_PATH}/report/trust_report.obj"
os.makedirs(os.path.dirname(REPORT_PATH), exist_ok=True)
trust_report.save(REPORT_PATH)

print(" Trust report generated and saved.")
print(trust_report)


MSE: 6.17848338301275
R² Score: 0.694859986025132
Initializing training dataset using RandomForestRegressor(random_state=42) as expert model
Expert model score: 0.9589751235526772
Initializing Trustee outer-loop with 10 iterations
########## Outer-loop Iteration 0/10 ##########
Initializing Trustee inner-loop with 10 iterations
########## Inner-loop Iteration 0/50 ##########
Sampling 365 points from training dataset with (1218, 1218) entries
Student model 0-0 trained with depth 22 and 255 leaves:
Student model score: 0.6556653644225946
Student model 0-0 fidelity: 0.6556653644225946
########## Inner-loop Iteration 1/50 ##########
Sampling 365 points from training dataset with (1328, 1328) entries
Student model 0-1 trained with depth 20 and 249 leaves:
Student model score: 0.792892291242224
Student model 0-1 fidelity: 0.792892291242224
########## Inner-loop Iteration 2/50 ##########
Sampling 365 points from training dataset with (1438, 1438) entries
Student model 0-2 trained with depth 2



Running Trust Report...
Preparing data...
Done!
Progress |----------------------------------------------------------------------------------------------------| 0.9% Complete
Collecting blackbox information...
Done!
Progress |█---------------------------------------------------------------------------------------------------| 1.8% Complete
Collecting trustee information...
Fitting blackbox model...
Done!
Blackbox model score report with training data:
R2 Score: 0.694859986025132
Using Classification Trustee algorithm to extract DT...
Initializing training dataset using RandomForestRegressor(random_state=42) as expert model
Expert model score: 0.9513282509561646
Initializing Trustee outer-loop with 5 iterations
########## Outer-loop Iteration 0/5 ##########
Initializing Trustee inner-loop with 5 iterations
########## Inner-loop Iteration 0/10 ##########
Sampling 365 points from training dataset with (1218, 1218) entries
Student model 0-0 trained with depth 17 and 255 leaves:
Student 



Done!
Plotting...


[1;30;43mStreaming output truncated to the last 5000 lines.[0m


Done!
Done!
 Trust report generated and saved.

+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|                                                                                                                    Classification Trust Report                                                                                                                    |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|                                                            +--------------------------------------------------------------------------------------------------------

<Figure size 3000x300 with 0 Axes>

<Figure size 4000x300 with 0 Axes>

<Figure size 5000x300 with 0 Axes>

<Figure size 5000x300 with 0 Axes>

<Figure size 4000x300 with 0 Axes>

<Figure size 4000x300 with 0 Axes>

<Figure size 4000x300 with 0 Axes>

In [7]:
!zip -r /content/out.zip /content/out
from google.colab import files
files.download('/content/out.zip')

  adding: content/out/ (stored 0%)
  adding: content/out/report/ (stored 0%)
  adding: content/out/report/trust_report.obj/ (stored 0%)
  adding: content/out/report/trust_report.obj/report/ (stored 0%)
  adding: content/out/report/trust_report.obj/report/trust_report_pruned_dt (deflated 76%)
  adding: content/out/report/trust_report.obj/report/trust_report_dt (deflated 83%)
  adding: content/out/report/trust_report.obj/report/plots/ (stored 0%)
  adding: content/out/report/trust_report.obj/report/plots/max_dt_stability.pdf (deflated 34%)
  adding: content/out/report/trust_report.obj/report/plots/dts_fidelity_x_depth.pdf (deflated 40%)
  adding: content/out/report/trust_report.obj/report/plots/min_dt_feature_stability.pdf (deflated 36%)
  adding: content/out/report/trust_report.obj/report/plots/top_features_lines.pdf (deflated 33%)
  adding: content/out/report/trust_report.obj/report/plots/min_dt_stability.pdf (deflated 34%)
  adding: content/out/report/trust_report.obj/report/plots/min

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>