# 📊 Train Word Readiness Model

This notebook trains a logistic regression model to predict whether a user is ready to learn a word based on logged interaction data.

In [None]:
import pandas as pd
import json
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib


In [None]:
# Load logged user interactions
with open("learnability_log.json", "r") as f:
    data = json.load(f)

df = pd.DataFrame(data)
df.head()


In [None]:
# Prepare features and target
features = ["similarity", "frequency", "pos_match", "cefr_gap"]
X = df[features]
y = df["label"]


In [None]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.25, random_state=42)


In [None]:
# Train logistic regression
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)


In [None]:
# Evaluate model
y_pred = clf.predict(X_test)
print("📊 Classification Report:")
print(classification_report(y_test, y_pred))


In [None]:
# Save the model
joblib.dump(clf, "word_readiness_model.pkl")
print("✅ Model saved to 'word_readiness_model.pkl'")
