# Analysis

In [1]:
import json
import pandas as pd

In [2]:
with open('donateacry_scored.json', 'r') as f:
    donateacry_data = json.load(f)

In [3]:
donateacry_df = {}

for i, entry in enumerate(donateacry_data):
    scores = pd.Series(entry['criteria_scores'])
    scores['class'] = entry['class']
    donateacry_df[i] = scores
donateacry_df = pd.DataFrame(donateacry_df).T

In [4]:
donateacry_df['class'].unique()

array(['belly_pain', 'discomfort', 'burping', 'tired', 'hungry'],
      dtype=object)

In [5]:
prior = {'hungry': 0.4, 'tired': 0.25, 'discomfort': 0.15, 'burping': 0.12, 'belly_pain': 0.08}

## Random Forest

In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (
    classification_report,
    f1_score,
    precision_score,
    recall_score,
    confusion_matrix
)
from imblearn.over_sampling import SMOTE
import numpy as np

# Separate features and target
X = donateacry_df.drop('class', axis=1)
y = donateacry_df['class']

# Encode target labels as integers
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# ----------------------------
# üßÆ Handle class imbalance with SMOTE
# ----------------------------
X_res, y_res = SMOTE(random_state=42).fit_resample(X, y_encoded)

# ----------------------------
# ‚öôÔ∏è Compute class weights from priors (optional but okay to keep)
# ----------------------------
# prior should be a dict like {"hungry":0.4, "tired":0.25, ...}
class_names = le.classes_
inv_prior = np.array([1.0 / prior[cls] for cls in class_names])
class_weights = inv_prior / inv_prior.min()
class_weight_dict = {i: w for i, w in enumerate(class_weights)}

# ----------------------------
# ‚úÇÔ∏è Split after resampling
# ----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X_res, y_res, test_size=0.2, random_state=42, stratify=y_res
)

# ----------------------------
# üå≤ Random Forest with tuning
# ----------------------------
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 2, 3, 5, 10, 20],
    'min_samples_split': [2, 5, 10]
}
rf = RandomForestClassifier(random_state=42, class_weight=class_weight_dict)
clf = GridSearchCV(rf, param_grid, cv=3, n_jobs=-1)
clf.fit(X_train, y_train)

print("Best parameters found:", clf.best_params_)

# ----------------------------
# üìä Evaluation
# ----------------------------
y_pred = clf.predict(X_test)

print("\nTraining accuracy:", clf.score(X_train, y_train))
print("Test accuracy:", clf.score(X_test, y_test))

# Detailed classification report
print("\nClassification report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))

# Macro-averaged metrics (explicitly)
macro_precision = precision_score(y_test, y_pred, average='macro', zero_division=0)
macro_recall = recall_score(y_test, y_pred, average='macro', zero_division=0)
macro_f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)

print(f"\nMacro Precision: {macro_precision:.3f}")
print(f"Macro Recall:    {macro_recall:.3f}")
print(f"Macro F1 Score:  {macro_f1:.3f}")

# Confusion matrix for visualization / debugging
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Best parameters found: {'max_depth': 20, 'min_samples_split': 2, 'n_estimators': 200}

Training accuracy: 1.0
Test accuracy: 0.928125

Classification report:
              precision    recall  f1-score   support

  belly_pain       0.97      0.97      0.97        64
     burping       1.00      0.95      0.98        64
  discomfort       0.89      0.98      0.93        64
      hungry       0.96      0.75      0.84        64
       tired       0.85      0.98      0.91        64

    accuracy                           0.93       320
   macro avg       0.93      0.93      0.93       320
weighted avg       0.93      0.93      0.93       320


Macro Precision: 0.933
Macro Recall:    0.928
Macro F1 Score:  0.927

Confusion Matrix:
[[62  0  0  2  0]
 [ 1 61  0  0  2]
 [ 0  0 63  0  1]
 [ 1  0  7 48  8]
 [ 0  0  1  0 63]]


In [7]:
# ‚öôÔ∏è Evaluate SMOTE-trained model on the original (non-resampled) data

from sklearn.metrics import (
    classification_report,
    f1_score,
    precision_score,
    recall_score,
    confusion_matrix
)

# Original data
X_orig = donateacry_df.drop('class', axis=1)
y_orig = donateacry_df['class']

# Encode labels using the same encoder as training
y_orig_encoded = le.transform(y_orig)

# Predictions from the SMOTE-trained model
y_orig_pred = clf.predict(X_orig)

print("\nüìä Evaluation on ORIGINAL (imbalanced) DonateACry data")
print("------------------------------------------------------")

print("Accuracy:", clf.score(X_orig, y_orig_encoded))

print("\nClassification report:")
print(classification_report(y_orig_encoded, y_orig_pred, target_names=le.classes_))

# Macro-averaged metrics
macro_precision = precision_score(y_orig_encoded, y_orig_pred, average='macro', zero_division=0)
macro_recall = recall_score(y_orig_encoded, y_orig_pred, average='macro', zero_division=0)
macro_f1 = f1_score(y_orig_encoded, y_orig_pred, average='macro', zero_division=0)

print(f"\nMacro Precision: {macro_precision:.3f}")
print(f"Macro Recall:    {macro_recall:.3f}")
print(f"Macro F1 Score:  {macro_f1:.3f}")

print("\nConfusion Matrix:")
print(confusion_matrix(y_orig_encoded, y_orig_pred))



üìä Evaluation on ORIGINAL (imbalanced) DonateACry data
------------------------------------------------------
Accuracy: 0.9584415584415584

Classification report:
              precision    recall  f1-score   support

  belly_pain       0.93      1.00      0.96        13
     burping       1.00      1.00      1.00         7
  discomfort       0.77      1.00      0.87        24
      hungry       1.00      0.95      0.97       320
       tired       0.72      1.00      0.84        21

    accuracy                           0.96       385
   macro avg       0.89      0.99      0.93       385
weighted avg       0.97      0.96      0.96       385


Macro Precision: 0.885
Macro Recall:    0.990
Macro F1 Score:  0.930

Confusion Matrix:
[[ 13   0   0   0   0]
 [  0   7   0   0   0]
 [  0   0  24   0   0]
 [  1   0   7 304   8]
 [  0   0   0   0  21]]


## Calibrated Classifier

In [22]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (
    classification_report,
    f1_score,
    precision_score,
    recall_score,
    confusion_matrix
)
from imblearn.over_sampling import SMOTE
import numpy as np

# Separate features and target
X = donateacry_df.drop('class', axis=1)
y = donateacry_df['class']

# Encode target labels as integers
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# ----------------------------
# üßÆ Handle class imbalance with SMOTE
# ----------------------------
X_res, y_res = SMOTE(random_state=42).fit_resample(X, y_encoded)

# ----------------------------
# ‚öôÔ∏è Compute class weights from priors (optional but okay to keep)
# ----------------------------
# prior should be a dict like {"hungry":0.4, "tired":0.25, ...}
class_names = le.classes_
inv_prior = np.array([1.0 / prior[cls] for cls in class_names])
class_weights = inv_prior / inv_prior.min()
class_weight_dict = {i: w for i, w in enumerate(class_weights)}

# ----------------------------
# ‚úÇÔ∏è Split after resampling
# ----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X_res, y_res, test_size=0.2, random_state=42, stratify=y_res
)

# ----------------------------
# üå≤ Random Forest with hyperparameter tuning
# ----------------------------
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 2, 3, 5, 10, 20],
    'min_samples_split': [2, 5, 10]
}
rf = RandomForestClassifier(random_state=42, class_weight=class_weight_dict)
grid = GridSearchCV(rf, param_grid, cv=3, n_jobs=-1)
grid.fit(X_train, y_train)

print("Best parameters found:", grid.best_params_)

# ----------------------------
# üß≠ Wrap the best model in a CalibratedClassifierCV
# ----------------------------
# Use isotonic (more flexible) or sigmoid (faster, smoother)
calibrated_clf = CalibratedClassifierCV(grid.best_estimator_, method="isotonic", cv=3)
calibrated_clf.fit(X_train, y_train)

# ----------------------------
# üìä Evaluation
# ----------------------------
y_pred = calibrated_clf.predict(X_test)
y_proba = calibrated_clf.predict_proba(X_test)  # calibrated probabilities

print("\nTraining accuracy:", calibrated_clf.score(X_train, y_train))
print("Test accuracy:", calibrated_clf.score(X_test, y_test))

# Detailed classification report
print("\nClassification report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))

# Macro-averaged metrics (explicitly)
macro_precision = precision_score(y_test, y_pred, average='macro', zero_division=0)
macro_recall = recall_score(y_test, y_pred, average='macro', zero_division=0)
macro_f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)

print(f"\nMacro Precision: {macro_precision:.3f}")
print(f"Macro Recall:    {macro_recall:.3f}")
print(f"Macro F1 Score:  {macro_f1:.3f}")

# Confusion matrix for visualization / debugging
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# ----------------------------
# üìà Example: calibrated probability check (optional)
# ----------------------------
# Example: print calibrated class probabilities for the first 5 test samples
print("\nExample calibrated probabilities (first 5 samples):")
for i in range(5):
    probs = {cls: p for cls, p in zip(le.classes_, y_proba[i])}
    print(f"Sample {i+1}: {probs}")


Best parameters found: {'max_depth': 20, 'min_samples_split': 2, 'n_estimators': 200}

Training accuracy: 1.0
Test accuracy: 0.9375

Classification report:
              precision    recall  f1-score   support

  belly_pain       1.00      0.97      0.98        64
     burping       1.00      0.95      0.98        64
  discomfort       0.94      0.97      0.95        64
      hungry       0.87      0.84      0.86        64
       tired       0.88      0.95      0.92        64

    accuracy                           0.94       320
   macro avg       0.94      0.94      0.94       320
weighted avg       0.94      0.94      0.94       320


Macro Precision: 0.939
Macro Recall:    0.938
Macro F1 Score:  0.938

Confusion Matrix:
[[62  0  0  2  0]
 [ 0 61  0  2  1]
 [ 0  0 62  2  0]
 [ 0  0  3 54  7]
 [ 0  0  1  2 61]]

Example calibrated probabilities (first 5 samples):
Sample 1: {'belly_pain': np.float64(0.0), 'burping': np.float64(0.0), 'discomfort': np.float64(0.0), 'hungry': np.float64(

In [23]:
import joblib

# Save the calibrated classifier to disk
joblib.dump(calibrated_clf, "models/calibrated_clf.joblib")
print("Calibrated classifier saved to models/calibrated_clf.joblib")


Calibrated classifier saved to models/calibrated_clf.joblib
