In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold, GridSearchCV, cross_val_score, cross_validate
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.preprocessing import PolynomialFeatures

from sklearn.decomposition import PCA

import matplotlib.pyplot as plt
import random


In [None]:
df_train = pd.read_csv('train_users.csv')
df_test = pd.read_csv('test_users.csv')

In [None]:
df_train.shape, df_test.shape

In [None]:
df_train.columns

In [None]:
df_train["label"] = df_train["label"].str.replace("_", "", regex=False)


In [None]:
df_test.columns

In [None]:
print(df_train.shape)
print(df_train.dtypes)
print("\nUnique labels:", df_train["label"].unique())
print("\nLabel counts:\n", df_train["label"].value_counts())
print("\nLabel proportions:\n", df_train["label"].value_counts(normalize=True))


# box plot for features

In [None]:
df_train["subscriber"] = df_train["subscriber"].astype(int)


In [None]:
feat_cols = [c for c in df_train.columns if c not in ["user_id", "label", "browser_version", "region_code"]]

for c in feat_cols:
    plt.figure()
    data = [df_train[df_train["label"]==lab][c].dropna() for lab in sorted(df_train["label"].unique())]
    plt.boxplot(data, labels=sorted(df_train["label"].unique()))
    plt.title(f"Boxplot: {c} by label")
    plt.show()


In [None]:
df_train["browser_version"].unique().shape

In [None]:
df_train["user_id"].unique().shape

# applying different models with cross validation

In [None]:
feat_cols = [c for c in df_train.columns if c not in ["user_id", "label", "browser_version", "region_code"]]
target = "label"

X = df_train[feat_cols]
y = df_train["label"].astype(str)

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

prep_tree = ColumnTransformer(
    [("num", SimpleImputer(strategy="median"), feat_cols)],
    remainder="drop"
)

prep_scaled = ColumnTransformer(
    [("num", Pipeline([
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler())
    ]), feat_cols)],
    remainder="drop"
)

models = {
    "DecisionTree": Pipeline([
        ("prep", prep_tree),
        ("clf", DecisionTreeClassifier(random_state=42, class_weight="balanced"))
    ]),
    "RandomForest": Pipeline([
        ("prep", prep_tree),
        ("clf", RandomForestClassifier(
            n_estimators=200,
            random_state=42,
            class_weight="balanced_subsample",
            n_jobs=-1
        ))
    ]),
    "HistGradientBoosting": Pipeline([
        ("prep", prep_tree),
        ("clf", HistGradientBoostingClassifier(
            max_depth=6, learning_rate=0.05, max_iter=800, random_state=42
        ))
    ]),
    "LogisticRegression": Pipeline([
        ("prep", prep_scaled),
        ("clf", LogisticRegression(
            max_iter=5000,
            class_weight="balanced",
            n_jobs=-1
        ))
    ]),
    "SVM_RBF": Pipeline([
        ("prep", prep_scaled),
        ("clf", SVC(
            kernel="rbf",
            C=10,
            gamma="scale",
            class_weight="balanced"
        ))
    ]),
    "MLP": Pipeline([
        ("prep", prep_scaled),
        ("clf", MLPClassifier(
            hidden_layer_sizes=(64, 64),
            alpha=1e-4,
            max_iter=1000,
            random_state=42
        ))
    ]),
}

scoring = {
    "acc": "accuracy",
    "macro_f1": "f1_macro"
}

rows = []
for name, model in models.items():
    out = cross_validate(
        model, X, y,
        cv=cv,
        scoring=scoring,
        n_jobs=-1,
        return_train_score=False
    )
    acc_mean, acc_std = out["test_acc"].mean(), out["test_acc"].std()
    f1_mean, f1_std = out["test_macro_f1"].mean(), out["test_macro_f1"].std()

    rows.append([name, acc_mean, acc_std, f1_mean, f1_std])

results = pd.DataFrame(rows, columns=["model", "acc_mean", "acc_std", "macroF1_mean", "macroF1_std"])
results = results.sort_values("acc_mean", ascending=False)

print(results.to_string(index=False))


# checking with PCA

In [None]:


feat_cols = [c for c in df_train.columns if c not in ["user_id", "label", "browser_version", "region_code"]]
target = "label"

X = df_train[feat_cols]
y = df_train[target].astype(str)

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# shared preprocessing: impute -> scale -> PCA(5)
prep_pca5 = ColumnTransformer(
    [("num", Pipeline([
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler()),
        ("pca", PCA(n_components=5, random_state=42)),
    ]), feat_cols)],
    remainder="drop"
)

models_pca = {
    "DecisionTree_PCA5": Pipeline([
        ("prep", prep_pca5),
        ("clf", DecisionTreeClassifier(random_state=42, class_weight="balanced"))
    ]),
    "RandomForest_PCA5": Pipeline([
        ("prep", prep_pca5),
        ("clf", RandomForestClassifier(
            n_estimators=200,
            random_state=42,
            class_weight="balanced_subsample",
            n_jobs=-1
        ))
    ]),
    "HistGradientBoosting_PCA5": Pipeline([
        ("prep", prep_pca5),
        ("clf", HistGradientBoostingClassifier(
            max_depth=6, learning_rate=0.05, max_iter=800, random_state=42
        ))
    ]),
    "LogisticRegression_PCA5": Pipeline([
        ("prep", prep_pca5),
        ("clf", LogisticRegression(
            max_iter=5000,
            class_weight="balanced",
            n_jobs=-1
        ))
    ]),
    "SVM_RBF_PCA5": Pipeline([
        ("prep", prep_pca5),
        ("clf", SVC(
            kernel="rbf",
            C=10,
            gamma="scale",
            class_weight="balanced"
        ))
    ]),
    "MLP_PCA5": Pipeline([
        ("prep", prep_pca5),
        ("clf", MLPClassifier(
            hidden_layer_sizes=(64, 64),
            alpha=1e-4,
            max_iter=1000,
            random_state=42
        ))
    ]),
}

scoring = {"acc": "accuracy", "macro_f1": "f1_macro"}

rows = []
for name, model in models_pca.items():
    out = cross_validate(
        model, X, y,
        cv=cv,
        scoring=scoring,
        n_jobs=-1,
        return_train_score=False
    )
    rows.append([
        name,
        out["test_acc"].mean(), out["test_acc"].std(),
        out["test_macro_f1"].mean(), out["test_macro_f1"].std()
    ])

results = pd.DataFrame(rows, columns=["model", "acc_mean", "acc_std", "macroF1_mean", "macroF1_std"])
results = results.sort_values("acc_mean", ascending=False)
print(results.to_string(index=False))


In [None]:
# how much variance is explained

In [None]:
pipe = Pipeline([
    ("prep", prep_pca5),
])
Z = pipe.fit_transform(X, y)  # not needed for training; just for inspection

# Get explained variance ratio from the fitted PCA inside the ColumnTransformer:
pca = pipe.named_steps["prep"].named_transformers_["num"].named_steps["pca"]
print("Explained variance (sum, 5 PCs):", pca.explained_variance_ratio_.sum())
print("Per-PC:", pca.explained_variance_ratio_)


In [None]:
models.keys()

In [None]:
models_pca.keys()

# Taking the best model and train on the whole set and get predictions

In [None]:


X_train = df_train[feat_cols]
y_train = df_train["label"].astype(str)

X_test  = df_test[feat_cols]

best_model = models['RandomForest']   

# full train
best_model.fit(X_train, y_train)

# Predict on test
test_pred = best_model.predict(X_test)


In [None]:
df_test['label'] = test_pred
df_test.to_csv('test_pred.csv')

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from rlcmab_sampler import sampler


In [None]:
roll_number = 4
reward_sampler = sampler(roll_number)

In [None]:
T = 10000

In [None]:
contexts = ['user1', 'user2', 'user3']
categories = ["ENTERTAINMENT", "EDUCATION", "TECH", "CRIME"]


In [None]:
def arm_index(context_id, category_id):
    # context_id = 0,1,2
    # category_id = 0,1,2,3
    return context_id * 4 + category_id



In [None]:
def select_best_category(Q):
    # greedy (choose argmax Q)
    return int(np.argmax(Q))

In [None]:
epsilons = [0.01, 0.05, 0.1]
results_egreedy = {eps: {} for eps in epsilons}

In [None]:
for eps in epsilons:
    for context_idx, context_name in enumerate(contexts):
        Q = np.zeros(len(categories), dtype = float) # Q-value for categories of news
        N = np.zeros(len(categories), dtype = int) # count for each categpry, i.e., how many times reward for each category is called/estimated.
        rewards = np.zeros(T, dtype=float) # per step reward

        for t in range(T):
            if np.random.rand() < eps:
                category_idx = np.random.randint(len(categories)) # explore from categories
            else:
                max_q = np.max(Q) # exploit by taking max 
                # but there can be multiple max_q. if we always pics the first max then we will be bias toward one category.
                candidates = [i for i, x in enumerate(Q) if x == max_q]
                category_idx = np.random.choice(candidates)
            
            # sample reward for category/arm
            j = arm_index(context_idx, category_idx)
            r = reward_sampler.sample(j)

            N[category_idx] += 1
            Q[category_idx] += (r - Q[category_idx]) / N[category_idx]
            rewards[t] = r
        
        avg_rewards = np.cumsum(rewards) / (np.arange(T) + 1)

        results_egreedy[eps][context_name] = {
            "Q": Q.copy(),
            "N": N.copy(),
            "rewards": rewards,
            "avg_rewards": avg_rewards
        }

                

In [None]:
for eps in epsilons:
    print(f"\nEpsilon={eps}")
    for context_name in contexts:
        Q = results_egreedy[eps][context_name]["Q"]
        print(f"  {context_name} Q-values: " + ", ".join([f"{cat}:{q:.4f}" for cat,q in zip(categories, Q)]))
        

In [None]:
for context_name in contexts:
    plt.figure(figsize=(8,5))
    for eps in epsilons:
        avg = results_egreedy[eps][context_name]['avg_rewards']
        plt.plot(avg, label=f"epsilon={eps}")
    plt.title(f"Avg Reward vs Time | {context_name} | Epsilon-Greedy")
    plt.xlabel("Time step")
    plt.ylabel("Average reward")
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()

In [None]:
Cs = [0.5, 1.0, 2.0]
results_ucb = {C:{} for C in Cs}

In [None]:
for C in Cs:
    for context_idx, context_name in enumerate(contexts):
        Q = np.zeros(len(categories), dtype = float)
        N = np.zeros(len(categories), dtype = int)
        rewards = np.zeros(T, dtype = float)

        for t in range(T):
            if (N == 0).any():
                category_idx = np.where(N==0)[0][0]
            else:
                ucb = Q + C * np.sqrt(np.log(t+1)/N)
                category_idx = int(np.argmax(ucb))
                
            j = arm_index(context_idx, category_idx)
            r = reward_sampler.sample(j)

            N[category_idx] += 1
            Q[category_idx] += (r - Q[category_idx])/N[category_idx]
            rewards[t] = r
            
        avg_rewards = np.cumsum(rewards)/(np.arange(T) + 1)

        results_ucb[C][context_name] = {
            'Q' : Q.copy(),
            'N' : N.copy(),
            'rewards' : rewards,
            "avg_rewards" : avg_rewards
        }

In [None]:
for C in Cs:
    print(f"C = {C}")
    for context_name in contexts:
        Q = results_ucb[C][context_name]['Q']
        print(f"  {context_name} Q-values: " + ", ".join([f"{cat}:{q:.4f}" for cat,q in zip(categories, Q)]))
        

In [None]:
for context_name in contexts:
    plt.figure(figsize=(8,5))
    for C in Cs:
        avg = results_ucb[C][context_name]['avg_rewards']
        plt.plot(avg, label = f"C={C}")
    plt.title(f"Avg Reward vs Time | {context_name} | UCB")
    plt.xlabel("Time step")
    plt.ylabel("Average reward")
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()

In [None]:
tau = 1.0
results_softmax = {}

In [None]:
for context_idx, context_name in enumerate(contexts):
    Q = np.zeros(len(categories), dtype = float)
    N = np.zeros(len(categories), dtype = int)
    rewards = np.zeros(T, dtype = float)

    for t in range(T):
        z = Q/tau
        z = z - np.max(z)
        probs = np.exp(z)
        probs = probs / probs.sum()

        category_idx = np.random.choice(len(categories), p = probs)
        j = arm_index(context_idx, category_idx)
        r = reward_sampler.sample(j)

        N[category_idx] += 1
        Q[category_idx] += (r - Q[category_idx]) / N[category_idx]
        rewards[t] = r
    avg_rewards = np.cumsum(rewards) / (np.arange(T) + 1)

    results_softmax[context_name] = {
        "Q": Q.copy(),
        "N": N.copy(),
        "rewards": rewards,
        "avg_rewards": avg_rewards
    }

In [None]:
for context_name in contexts:
    Q = results_softmax[context_name]["Q"]
    print(f"{context_name} Q-values: " + ", ".join([f"{cat}:{q:.4f}" for cat,q in zip(categories, Q)]))
    

In [None]:
for context_name in contexts:
    plt.figure(figsize=(8, 5))
    avg = results_softmax[context_name]["avg_rewards"]
    plt.plot(avg, label=f"tau={tau}")
    plt.title(f"Avg Reward vs Time | {context_name} | Softmax")
    plt.xlabel("Time step")
    plt.ylabel("Average reward")
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()

In [None]:
def select_best_category(Q):
    # greedy (choose argmax Q)
    return int(np.argmax(Q))
    
def recommend_for_policy(users, policy_name, policy_state, articles):
    outputs = []
    user_context = users['label']
    for i, context in enumerate(user_context):
        context = str(context)
        Q = policy_state[context]["Q"]

        # choose category (greedy)
        category_idx = select_best_category(Q)
        cat = categories[category_idx]

        temp = articles.loc[articles['category']==cat]
        if len(temp) == 0:
            article = None
        else:
            random_row = temp.sample()
            
            
        outputs.append({
            "user_id": users.iloc[i]["user_id"],
            "predicted_context": context,
            "recommended_category": cat,
            "link": random_row['link'].values[0],
            "short_description": random_row['short_description'].values[0],
            "headline": random_row['headline'].values[0],
        })
    return outputs        

In [None]:
articles = pd.read_csv('news_articles.csv')
category_col = "category"
articles = articles[articles[category_col].isin(categories)].copy()
articles = articles.reset_index(drop=True)
articles.shape


In [None]:
df_test = pd.read_csv('test_pred.csv')
user_context = df_test[['user_id', 'label']]

In [None]:
# Choose the hyperparams to use for epsilon/UCB
best_eps = 0.05
best_C = 1.0

In [None]:
eps_policy = {context: results_egreedy[best_eps][context] for context in contexts}
ucb_policy = {context: results_ucb[best_C][context] for context in contexts}
softmax_policy = results_softmax


In [None]:
eps_recos = recommend_for_policy(user_context, "epsilon", eps_policy, articles)
ucb_recos = recommend_for_policy(user_context, "ucb", ucb_policy, articles)
softmax_recos = recommend_for_policy(user_context, "softmax", softmax_policy, articles)


In [None]:
eps_recos[:3]