# Figure Generation

This notebook returns the input data, the predicted ADL label, and the true ADL label.

In [1]:
import os

import pandas as pd
import polars as pl
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier

import egoviz.models.evaluation as ev
import egoviz.models.processing as pr
import egoviz.visualize as viz

SEED = 42

pd.set_option("display.max_rows", 300)

cwd = os.getcwd()

In [2]:
data = pr.load_pickle(os.path.join(cwd, '../data/home_data_all_preds_df.pkl'))
df_active = pr.generate_binary_presence_df(data)
df_binary_active_scaled = pr.row_wise_min_max_scaling(df_active)

data = pr.load_pickle(os.path.join(cwd, '../data/home_data_all_preds.pkl'))
df_counts = pr.generate_counts_df(pr.generate_df_from_preds(data))
df_counts_active_scaled = pr.row_wise_min_max_scaling(df_counts)

In [3]:
models_binary = [
    ('Logistic Regression', LogisticRegression(max_iter=1000, random_state=SEED, class_weight='balanced')),
    ('XGBoost', XGBClassifier(random_state=SEED)),
]

label_encoder = LabelEncoder()

binary_results, binary_results_df = ev.evaluate_models(models_binary, df_binary_active_scaled, label_encoder)

2024-01-25 17:39:25,860 - root - INFO - LOGOCV complete for LogisticRegression
2024-01-25 17:39:31,260 - root - INFO - LOGOCV complete for XGBClassifier


In [4]:
ev.display_pct_table(binary_results_df, threshold=0.5)

Unnamed: 0,model,median_f1,pct_above_0.5
0,LogisticRegression,0.811589,1.0
1,XGBClassifier,0.799526,0.88


### Get Samples

In [5]:
binary_results[0].samples.index

Index(['SCI02', 'SCI03', 'SCI06', 'SCI08', 'SCI10', 'SCI11', 'SCI12', 'SCI13',
       'SCI14', 'SCI15', 'SCI16', 'SCI17', 'SCI18', 'SCI19', 'SCI20', 'SCI21'],
      dtype='object')

In [15]:
df = {}
for idx in binary_results[0].samples.index:
    df[idx] = (
        pl.from_pandas(binary_results[0].samples.loc[idx]["X_test"])
        .with_columns(
            pl.Series("video_id", binary_results[0].samples.loc[idx]["videos"]),
            pl.Series("y_true", binary_results[0].samples.loc[idx]["y_test"]),
            pl.Series("y_pred", binary_results[0].samples.loc[idx]["y_pred"]),
            pl.Series("y_true_label", binary_results[0].samples.loc[idx]["y_test_label"]),
            pl.Series("y_pred_Label", binary_results[0].samples.loc[idx]["y_pred_label"]),
        )
    )

In [64]:
import numpy as np

# pandas view all columns
pd.set_option("display.max_columns", None)

id = 'SCI02'

filtered = df[id].filter((pl.col('y_true') == pl.col('y_pred')) & (pl.col("y_true")==5)).row(1, named=True); filtered

{'count_clothing_accessory': 0.07692307692307693,
 'count_phone_tablet': 0.0,
 'count_other': 0.15384615384615385,
 'count_office_stationary': 0.0,
 'count_footwear': 0.0,
 'count_furniture': 0.0,
 'active_other': 0.0,
 'active_furniture': 0.0,
 'active_phone_tablet': 0.0,
 'active_footwear': 0.0,
 'active_office_stationary': 0.0,
 'active_clothing_accessory': 0.0,
 'count_furnishing': 0.07692307692307693,
 'active_furnishing': 0.0,
 'count_drinkware': 0.38461538461538464,
 'active_drinkware': 0.0,
 'count_home_appliance_tool': 0.5384615384615384,
 'active_home_appliance_tool': 0.0,
 'count_food': 1.0,
 'active_food': 0.7692307692307693,
 'count_animal': 1.0,
 'active_animal': 0.0,
 'count_clothing': 0.0,
 'active_clothing': 0.0,
 'count_cleaning_product': 1.0,
 'active_cleaning_product': 0.0,
 'count_bag': 0.0,
 'active_bag': 0.0,
 'count_toiletries': 0.07692307692307693,
 'active_toiletries': 0.0,
 'count_electronics': 0.0,
 'active_electronics': 0.0,
 'count_kitchen_utensils': 1.0,


: 

In [63]:
filtered.to_pandas().replace(0, np.nan).dropna(axis=1).to_dict()

AttributeError: 'tuple' object has no attribute 'to_pandas'