In [1]:
# Install Dependencies
!pip install gdown scikit-learn pandas

import gdown
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score



In [2]:
# Download Preprocessed Data

url = 'https://drive.google.com/uc?id=1nUNgJzVOyKywhGODfwN5TfpSGFJE1OTG'
output = 'modeling_dataset.csv'
gdown.download(url, output, quiet=False)

df = pd.read_csv('modeling_dataset.csv')
print("✅ Loaded:", df.shape)

Downloading...
From: https://drive.google.com/uc?id=1nUNgJzVOyKywhGODfwN5TfpSGFJE1OTG
To: /content/modeling_dataset.csv
100%|██████████| 318k/318k [00:00<00:00, 69.0MB/s]

✅ Loaded: (2176, 10)





In [3]:
# ***Regression Model for avg_chunk_size***
features = [
    'packet_count',
    'total_bytes',
    'avg_packet_size',
    'avg_inter_packet_delay',
    'std_inter_packet_delay'
]

X = df[features].fillna(0)

y_reg = df['avg_chunk_size']
X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X, y_reg, test_size=0.2, random_state=42)

regressor = RandomForestRegressor(n_estimators=100, random_state=42)
regressor.fit(X_train_r, y_train_r)
y_pred_r = regressor.predict(X_test_r)

mse = mean_squared_error(y_test_r, y_pred_r)
print(f"avg_chunk_size Regressor MSE: {mse:.2f}")

byte_error = np.sqrt(mse)
print(f"avg_chunk_size Regressor Byte Error: {byte_error:.2f}")

avg_chunk_size Regressor MSE: 8463596353.75
avg_chunk_size Regressor Byte Error: 91997.81


In [4]:
# ***Classification Model for Rebuffering (chunk count unexpectedly drops)

# Define rebuffering as chunk_count < 8
# If predicted fewer than 8 chunks were used, this implies rebuffeirng occured!
#
#
df['rebuffered'] = (df['chunk_count'] < 8).astype(int)
y_clf = df['rebuffered']

X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X, y_clf, test_size=0.2, random_state=42)

classifier = LogisticRegression(max_iter=500)
classifier.fit(X_train_c, y_train_c)
y_pred_c = classifier.predict(X_test_c)

acc = accuracy_score(y_test_c, y_pred_c)
report = classification_report(y_test_c, y_pred_c)

print(f"\nRebuffering Classifier Accuracy: {acc:.2f}")
print("Classification Report:\n", report)


Rebuffering Classifier Accuracy: 0.91
Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.96      0.94       348
           1       0.81      0.69      0.75        88

    accuracy                           0.91       436
   macro avg       0.87      0.83      0.85       436
weighted avg       0.90      0.91      0.90       436

