In [6]:
# Cell 1: Imports
import pandas as pd
import joblib
import os

# Cell 2: Load your dataset
df = pd.read_csv("All_Curated_Demog_Activity_Sleep5_min_N23.csv")

# Cell 3: Rename HR_normalized ‚Üí HR_Normalized if needed
if "HR_normalized" in df.columns and "HR_Normalized" not in df.columns:
    df.rename(columns={"HR_normalized": "HR_Normalized"}, inplace=True)
    print("‚úÖ Renamed 'HR_normalized' to 'HR_Normalized'")

# Cell 4: Prediction helper function
def apply_RF_LASMaL_model(df, model_path, encoder_path, feature_columns, pred_col_name):
    try:
        model = joblib.load(model_path)
        encoder = joblib.load(encoder_path)

        df_valid = df[df[feature_columns].notnull().all(axis=1)].copy()
        X_new = df_valid[feature_columns]

        df_valid[pred_col_name] = encoder.inverse_transform(model.predict(X_new))
        df[pred_col_name] = df_valid[pred_col_name]

        print(f"‚úÖ Prediction column added: {pred_col_name}")

    except Exception as e:
        print(f"‚ùå Error applying model: {e}")

# Cell 5: Set paths and parameters
model_path = "savedmodels/models_k3_lasmal_RandomForest/Activity_best_model_lasmal.pkl"
encoder_path = "savedmodels/models_k3_lasmal_RandomForest/Activity_label_encoder_lasmal.pkl"
feature_columns = ['HR_Normalized', 'Cadence', 'Speed']
pred_col_name = "Activity_Pred_RF"

# Cell 6: Apply model
apply_RF_LASMaL_model(df, model_path, encoder_path, feature_columns, pred_col_name)

# Cell 7: Save final output
output_path = "All_Curated_Demog_Activity_Sleep5_min_N23_with_Activity_Pred_RF.csv"
df.to_csv(output_path, index=False)
print(f"‚úÖ File saved: {output_path}")


‚úÖ Renamed 'HR_normalized' to 'HR_Normalized'
‚úÖ Prediction column added: Activity_Pred_RF
‚úÖ File saved: All_Curated_Demog_Activity_Sleep5_min_N23_with_Activity_Pred_RF.csv


In [7]:
## Clean Day_label Values and Remove Missing Rows

In [8]:
# üì• Load the CSV file with predicted activity
import pandas as pd

# Load your file
df = pd.read_csv("All_Curated_Demog_Activity_Sleep5_min_N23_with_Activity_Pred_RF.csv")

# üßπ Remove trailing digits from Day_label (e.g., 'PR2' ‚Üí 'PR')
df['Day_label'] = df['Day_label'].astype(str).str.replace(r'\d+', '', regex=True)

# üóëÔ∏è Drop rows where Day_label is empty or was NaN
df = df[df['Day_label'].notnull() & (df['Day_label'].str.strip() != '')]

# üíæ Save cleaned CSV
df.to_csv("All_Curated_Demog_Activity_Sleep5_min_N23_with_Activity_Pred_RF.csv", index=False)
print("‚úÖ Cleaned and saved: All_Curated_Demog_Activity_Sleep5_min_N23_with_Activity_Pred_RF.csv")


‚úÖ Cleaned and saved: All_Curated_Demog_Activity_Sleep5_min_N23_with_Activity_Pred_RF.csv
