In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.multioutput import MultiOutputRegressor
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import make_pipeline

# 🚀 Load the improved dataset
df = pd.read_csv("improved_acl_rehab_dataset.csv")

# 📌 Encode categorical columns (Rehab Phase, Exercise)
label_enc_phase = LabelEncoder()
label_enc_exercise = LabelEncoder()

df["Rehab Phase"] = label_enc_phase.fit_transform(df["Rehab Phase"])
df["Exercise"] = label_enc_exercise.fit_transform(df["Exercise"])

# 🔢 Convert gender column to integer
df["Gender (0=Male, 1=Female)"] = df["Gender (0=Male, 1=Female)"].astype(int)

# 🔢 Convert Recovery Stage to numeric encoding
label_enc_recovery_stage = LabelEncoder()
df["Recovery Stage"] = label_enc_recovery_stage.fit_transform(df["Recovery Stage"])

# 📊 Define Features (X) & Targets (y)
X = df[["Age", "Gender (0=Male, 1=Female)", "Rehab Phase", "Recovery Time (Months)", "Recovery Stage"]]
y_exercise = df[["Exercise", "Sets", "Reps"]]  # Multi-output target

# 📍 Split into Training & Testing
X_train, X_test, y_train, y_test = train_test_split(X, y_exercise, test_size=0.2, random_state=42)

# 🏋️ Train Rehab Exercise Prediction Model (Random Forest)
model = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42))
model.fit(X_train, y_train)

# 📌 Train NLP Model for Recovery Tips
df["text_features"] = df[["Age", "Gender (0=Male, 1=Female)", "Rehab Phase", "Recovery Time (Months)", "Recovery Stage"]].astype(str).agg(" ".join, axis=1)
X_text = df["text_features"]
y_text = df["Recovery Tips"]

X_train_text, X_test_text, y_train_text, y_test_text = train_test_split(X_text, y_text, test_size=0.2, random_state=42)

text_model = make_pipeline(TfidfVectorizer(), RandomForestClassifier(n_estimators=100, random_state=42))
text_model.fit(X_train_text, y_train_text)

# 🎯 Function to Predict Full Rehab Plan (All 4 Phases)
def generate_full_rehab_plan(age, gender, recovery_time):
    rehab_plan = "\n🔷 **Welcome to the ACL Rehab Plan Generator** 🔷\n"
    rehab_plan += "\n🏋️‍♂️ **Here's Your Custom ACL Rehab Plan** 🏋️‍♂️\n"

    # Convert Recovery Stage into categorical label
    recovery_stage = label_enc_recovery_stage.transform([
        "Early" if recovery_time <= 3 else "Mid" if recovery_time <= 6 else "Late" if recovery_time <= 9 else "Full Return"
    ])[0]

    for phase in label_enc_phase.classes_:  # Loop through all rehab phases
        new_user_numeric = pd.DataFrame([[age, gender, label_enc_phase.transform([phase])[0], recovery_time, recovery_stage]], columns=X.columns)

        # Predict exercises
        predictions = np.round(model.predict(new_user_numeric)).astype(int)
        predicted_exercises = label_enc_exercise.inverse_transform([predictions[0, 0]])  # Convert number to label

        # Generate additional exercises by adding slight variations
        additional_exercises = []
        for _ in range(2):  # Get 2 more unique exercises
            rand_offset = np.random.randint(-2, 3)  # Random small offset
            new_exercise_idx = np.clip(predictions[0, 0] + rand_offset, 0, len(label_enc_exercise.classes_) - 1)
            additional_exercises.append(label_enc_exercise.inverse_transform([new_exercise_idx])[0])

        # Merge exercises and remove duplicates
        all_exercises = list(set([predicted_exercises[0]] + additional_exercises))

        # **🔹 Randomized Recovery Tips**
        user_text_input = [f"{age} {gender} {phase} {recovery_time} {recovery_stage} {exercise}" for exercise in all_exercises]
        possible_tips = text_model.predict(user_text_input)  # Get multiple possible tips
        predicted_tips = np.random.choice(possible_tips)  # Randomly pick one tip

        # 🏋️‍♂️ Format Output for this phase
        rehab_plan += f"\n🔹 **{phase}** 🔹\n"
        for exercise in all_exercises[:3]:  # Ensure max 3 exercises per phase
            rehab_plan += f"✅ {int(predictions[0, 1])} sets of {int(predictions[0, 2])} reps of {exercise}\n"

        rehab_plan += f"💡 **{phase} Tips:** {predicted_tips}\n"

    return rehab_plan

# 🔥 Get User Input
print("\n🔷 Welcome to the ACL Rehab Plan Generator 🔷\n")

# 🚨 Injury (For now, only ACL Tear is supported)
injury = input("Enter your injury (e.g., 'ACL Tear'): ").strip().lower()
if injury != "acl tear":
    print("❌ Sorry, we only support ACL Tear rehab plans for now.")
    exit()

# 👤 Age Input
age = int(input("Enter your age: ").strip())

# 🚻 Gender Input
gender_input = input("Enter your gender (Male/Female): ").strip().lower()
gender = 0 if gender_input == "male" else 1  # Convert to 0 for Male, 1 for Female

# 📅 Recovery Time Input
recovery_time = int(input("Enter your recovery time (in months): ").strip())

# 🎯 Generate Rehab Plan
print(generate_full_rehab_plan(age, gender, recovery_time))
