In [None]:
# ===== Cell 1: Imports =====

import os
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


In [None]:
# ===== Cell 2: Load Dataset (Auto-detect) =====

import os
import pandas as pd

base_path = "/kaggle/input"

# Show available dataset folders
folders = os.listdir(base_path)
print("Available folders:", folders)

# Select the first dataset folder
dataset_path = os.path.join(base_path, folders[0])

# Find CSV files inside it
csv_files = [f for f in os.listdir(dataset_path) if f.endswith(".csv")]
print("CSV files found:", csv_files)

# Load the first CSV file
file_path = os.path.join(dataset_path, csv_files[0])
df = pd.read_csv(file_path)

df.head()


In [None]:
# ===== Cell 3: Select Features and Target =====

# Target variable
target_column = "prakriti"

# Feature columns
feature_columns = [
    'age',
    'stress_level',
    'anxiety_level',
    'depression_score',
    'maslow_physiological',
    'maslow_safety',
    'maslow_social',
    'maslow_esteem',
    'maslow_self_actualization',
    'sleep_hours',
    'exercise_frequency',
    'caffeine_intake',
    'screen_time_hours'
]

# Select features and target
X = df[feature_columns]
y = df[target_column]

print("Features shape:", X.shape)
print("Target shape:", y.shape)


In [None]:
# ===== Cell 4: Train-Test Split =====

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y  # keeps class distribution balanced
)

print("Training set size:", X_train.shape)
print("Testing set size:", X_test.shape)

In [None]:
# ===== Cell 5: Initialize Random Forest Classifier =====

from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(
    n_estimators=100,
    random_state=42,
    class_weight="balanced"  # handles class imbalance if present
)

rf


In [None]:
# ===== Cell 6: Train the Model =====

rf.fit(X_train, y_train)

print("Model training completed successfully.")


In [None]:
# ===== Cell 7: Make Predictions =====

y_pred = rf.predict(X_test)

print("Predictions generated.")


In [None]:
# ===== Cell 8: Model Evaluation =====

from sklearn.metrics import accuracy_score, classification_report

accuracy = accuracy_score(y_test, y_pred)

print("Random Forest Classification Results")
print("-" * 40)
print(f"Accuracy: {accuracy:.4f}\n")

print("Classification Report:")
print(classification_report(y_test, y_pred))


In [None]:
# ===== Cell 9: Predict Prakriti for a New Patient =====

# Ensure feature order matches training data
new_patient = pd.DataFrame([{
    'age': 30,
    'stress_level': 7,
    'anxiety_level': 6,
    'depression_score': 4,
    'maslow_physiological': 6,
    'maslow_safety': 5,
    'maslow_social': 4,
    'maslow_esteem': 3,
    'maslow_self_actualization': 2,
    'sleep_hours': 5,
    'exercise_frequency': 3,
    'caffeine_intake': 2,
    'screen_time_hours': 6
}])[X.columns]  # guarantees correct feature alignment

predicted_prakriti = rf.predict(new_patient)

print("Predicted Prakriti for the new patient:", predicted_prakriti[0])


In [None]:
# ===== Cell 10: Save Trained Model =====

import joblib

model_filename = "random_forest_prakriti_model.pkl"
joblib.dump(rf, model_filename)

print(f"Model saved successfully as '{model_filename}'")
