# Supervised Model Baseline Notebook

This notebook trains baseline supervised models using project code and summarizes metrics for subscribers, earnings, and growth targets.

## 1) Setup

In [None]:
from pathlib import Path
import sys

import pandas as pd
import plotly.express as px

ROOT = Path.cwd().resolve()
if not (ROOT / "src").exists() and (ROOT.parent / "src").exists():
    ROOT = ROOT.parent
sys.path.insert(0, str(ROOT / "src"))

from youtube_success_ml.config import TrainingConfig
from youtube_success_ml.data.loader import load_dataset
from youtube_success_ml.models.supervised import train_supervised_bundle, top_feature_importance


## 2) Train Baseline Bundle

In [None]:
df = load_dataset()
cfg = TrainingConfig()
bundle = train_supervised_bundle(df, config=cfg)
metrics = pd.DataFrame(bundle.metrics).T
metrics

## 3) Metrics Visualization

In [None]:
plot_df = metrics.reset_index().rename(columns={"index": "target"})
plot_df = plot_df.melt(id_vars="target", var_name="metric", value_name="value")
px.bar(plot_df, x="target", y="value", color="metric", barmode="group", title="Baseline Metrics by Target")

## 4) Feature Importance: Subscribers

In [None]:
fi_sub = pd.DataFrame(top_feature_importance(bundle, target="subscribers", top_n=15))
fi_sub

## 5) Feature Importance: Earnings

In [None]:
fi_earn = pd.DataFrame(top_feature_importance(bundle, target="earnings", top_n=15))
fi_earn

## 6) Feature Importance: Growth

In [None]:
fi_growth = pd.DataFrame(top_feature_importance(bundle, target="growth", top_n=15))
fi_growth

## 7) Importance Chart (Growth Target)

In [None]:
top_growth = fi_growth.sort_values("importance", ascending=False).head(12)
px.bar(top_growth, x="feature", y="importance", title="Top Feature Importances (Growth Model)")