# InCal

In [1]:
import pandas as pd
import plotly.express as px
import numpy as np
from pandas.api.types import CategoricalDtype
from IPython.display import display, HTML
from statsmodels.formula.api import ols
from collections import OrderedDict, Counter
from jupyter_dash import JupyterDash 
from dash import html
from dash import dcc
import itertools
from dash import no_update
from dash import dash_table
import dash
from dash.dependencies import Input, Output, State

dataframes = {}
is_one_file = True
# 'this code block for the jupter colab only'
# try:
#     from google.colab import files
#     import io
#     uploaded = files.upload()
#     if len(uploaded) > 1:
#         for fn in uploaded.keys():
#             print('User uploaded file "{name}" with length {length} bytes'.format(
#                 name=fn, length=len(uploaded[fn])))
#             dataframes[fn] = pd.read_csv(io.BytesIO(uploaded[fn]))
#             is_one_file = False
#     else:
#         name = list(uploaded.keys())[0]
#         dataframes = pd.read_csv(io.BytesIO(uploaded[name]))
# except:
#     print('check the if error - file.csv, there more then one table in the sheet?, is table has missing columns?')


dataframes = pd.read_csv('csvs/all_weeks/hebrew_2021-08-10_16_15_hebrew16_shani_w2p2.1_m_calr.csv')

dict_groups = OrderedDict(Control = [1, 4, 7, 10, 13], six_M = [3, 5, 9, 12, 16], three_M = [2, 6, 8, 11, 14, 15])
print(f"{dict_groups}")

name_for_replacement_in_data_table = {
    'bodymass': 'Weight_(gr)', 
    'vo2': 'Oxygen_Consumption_(ml/hr)', 
    'vco2': 'Carbon_Dioxide_Production_(ml/hr)',
    'kcal_hr': 'Energy_Expenditure_(kcal/hour)',
    'foodupa': 'Cumulative_Food_Intake_(kcal)',
    'rq': 'Respiratory_Exchange_Ratio',
    'pedmeters': 'Pedestrian_Locomotion_(m)',
    'allmeters': 'Total_Distance_includes_fine_movement_(m)',
    'waterupa': 'Cumulative_Water_Intake_(ml)',
    'Wheel': 'Total_Wheel_Counts_(Counts)' # need to check about the wheel parme...
}

calculeted_parmeters = {
    'water': 'Hourly_Water_Intake_(ml)',   
    'food': 'Hourly_Food_Intake_(kcal)',
    'locomotor_activity': 'Locomotor_Activity_(beam_breaks)',
    'energy_balance': 'Energy_Balance_(kcal/hour)',
}


def get_data(__global_df__, categories_columns_names):
    # need to find solution to the use of global var
    time_series = __global_df__.reset_index(level=categories_columns_names)
    order_categoreis_columns(time_series, subjectID=dict_groups.values(), Group=dict_groups.keys())
    return time_series

def trim_df_datetime(df, start_time, end_time):
    return df.loc[start_time:end_time]

def remove_data(df, outliers_true, start_time, end_time):
    if not outliers_true:
        return trim_df_datetime(df, start_time, end_time)        
    outliers_removed = remove_outliers_mixed_df(df, 'subjectID')
    return trim_df_datetime(outliers_removed, start_time, end_time).dropna()

#  removing subjects or group
def get_values_level(df, number_or_index_name):
  return df.index.get_level_values(number_or_index_name)

def get_difference_from_list_2(list1, list2):
    return list(set(list1) - set(list2))

def incal_remove_subjects(df, number_or_index_name, subjects_to_remove):
  subjects = get_values_level(df, number_or_index_name)
  strs_to_ints = lambda l: [int(x) for x in l]
  # subjects_to_remove_ints = strs_to_ints(subjects_to_remove)
  selected_subjects = get_difference_from_list_2(subjects, subjects_to_remove_ints)
  print(subjects_to_remove_ints, selected_subjects)
  return df.loc[:, selected_subjects , :]

def incal_remove_groups(df, number_or_index_name, groups_to_remove):
  subjects = get_values_level(df, number_or_index_name)
  selected_group = get_difference_from_list_2(subjects, groups_to_remove)
  return df.loc[:, : , selected_group]

# removing outliears
def sort_data_by_ids(df, column_name):
    return df.sort_values(column_name)

def flat_list(d_list):
    '''
    dependencies: itertools
    '''
    return list(itertools.chain.from_iterable(d_list))

def slice_df_for_floats_and_category(df, column_name):
    return df.select_dtypes(include=['float64']), df.select_dtypes(include=['category'])

def get_subject_ids(df, column_name):
    return df[column_name].unique()

def calc_mean_and_std_for_df_by_ids(df, ids_values):
    return df.groupby(ids_values).agg([np.mean, np.std])

def get_lims_upper_and_lower(df_means_and_stds, number_of_ids, number_featuers_columns, by_sd_of=2):
    calcs_shape_values = df_means_and_stds.values.reshape(number_of_ids, number_featuers_columns, 2)
    means = calcs_shape_values[:, :, :1]
    stds = calcs_shape_values[:, :, 1:]
    upper_lims = means + stds * by_sd_of
    lower_lims = means - stds * by_sd_of
    return upper_lims, lower_lims

def reshpe_vlaues_3d_ndarray(ndarray, axis0_dimensions, axis1_columns, axis2_rows):
    return ndarray.reshape(axis0_dimensions, axis1_columns, axis2_rows)

def select_and_replace_outliers(ndarry_of_features, ndarry_uppers_lims, ndarry_lowers_lims):
    conditiones = [
        ndarry_of_features > ndarry_uppers_lims,
        ndarry_of_features < ndarry_lowers_lims
    ]
    choices = [np.nan, np.nan]
    return np.select(conditiones, choices, ndarry_of_features)
    
def back_to_2d_ndarray(ndarry_of_features, axis1, axis2):
    return ndarry_of_features.reshape(axis1, axis2)
    
def sort_data_by_index(df):
    return df.sort_index()

def get_categories_cals_names(df):
    return df.index.names[1:] 

def incal_get_categories_col_from_multiindex(df):
  levels_names = get_categories_cals_names(df)
  get_values_values_from_index = df.reset_index(level=levels_names)
  return get_values_values_from_index[levels_names]

def remove_outliers_mixed_df(df):
    # sourcery skip: inline-immediately-returned-variable
    sorted_df = df.sort_index(level=1)
    fetuers, ids = df.values, df.index
    df_means_and_stds = calc_mean_and_std_for_df_by_ids(df, ids.get_level_values(1).astype('int32'))
    number_of_ids = len(ids.levels[1].categories.astype('int32'))
    fetuers_columns = df.columns
    number_featuers_columns = len(fetuers_columns)
    upper_lims, lower_lims = get_lims_upper_and_lower(df_means_and_stds, number_of_ids, number_featuers_columns)
    dimensions_by_numbers_of_ids_upper_lims = reshpe_vlaues_3d_ndarray(upper_lims, number_of_ids, 1, number_featuers_columns) 
    dimensions_by_numbers_of_ids_lower_lims = reshpe_vlaues_3d_ndarray(lower_lims, number_of_ids, 1, number_featuers_columns)
    columns_of_each_id = fetuers.shape[0] // number_of_ids
    dimensions_by_numbers_of_ids_values = reshpe_vlaues_3d_ndarray(
        fetuers, 
        number_of_ids, 
        columns_of_each_id, 
        number_featuers_columns
    )
    outliers_replaced_to_nan_values_ndarray = select_and_replace_outliers(
        dimensions_by_numbers_of_ids_values, 
        dimensions_by_numbers_of_ids_upper_lims, 
        dimensions_by_numbers_of_ids_lower_lims
        )
    combien_axis0_and_axis1 = number_of_ids * columns_of_each_id
    original_df_shape = back_to_2d_ndarray(outliers_replaced_to_nan_values_ndarray, combien_axis0_and_axis1, number_featuers_columns)
    df_fetuers_without_outliers = pd.DataFrame(original_df_shape, columns=fetuers_columns, index=ids)
    df_without_outliers = pd.concat([df_fetuers_without_outliers], axis=1)
    return df_without_outliers
# 17.1 ms ± 175 µs per loop (mean ± std. dev. of 5 runs, 100 loops each)


def incal_set_multindex(df, list_of_multi_index, drop_current_index=False):
  ids_indexed_df = df.reset_index(drop=drop_current_index)
  return ids_indexed_df.set_index(list_of_multi_index)

def create_category_column(df , categories, ordered=True):
    '''
    order_categoreis_columns make sure the group and subjects in the right order. This is for,
    the statiscal analysis. The groups and the subjects needs to be in order of the expriment design.
    In order the anova, ancova and anova with interaction to work properly
    
    '''
    return pd.Categorical(df, categories=categories, ordered=True)

def replace_ids_to_group_id(ndarray_ids, groups_names, subjects_within_group):
  conditiones = [ndarray_ids == n for n in subjects_within_group]
  choices = groups_names
  return np.select(conditiones, choices, ndarray_ids)

def incal_create_group_column_from_ids(df, ids_column_name, dict_groups):
  n_ids_multiple_name = lambda name, n: [name] * len(n)
  subjects_vlaues = incal_format[ids_column_name].values
  items = dict_groups.items()
  groups_names = flat_list([n_ids_multiple_name(group, ids) for group, ids in items])
  subjects_within_groups = flat_list([ids for ids in dict_groups.values()])
  return replace_ids_to_group_id(subjects_vlaues, groups_names, subjects_within_groups)

def incal_assemble_group_column_in_df(
    df, 
    ids_column_name, 
    dict_groups, 
    group_column_name):
  values = incal_create_group_column_from_ids(df, ids_column_name, dict_groups)
  series = pd.Series(values,  copy=False, name=group_column_name)
  return concat_dfs([incal_format, series])

def get_incal_levels_properties(dict_groups):
  date_time_type = 'datetime64[ns]'
  order_subjects = flat_list(dict_groups.values())
  order_groups = list(dict_groups.keys())
  return date_time_type, order_subjects, order_groups

def design_incal_levels(idx0, idx1, idx2, date_time_type, order_subjects, order_groups):
  level_0 = idx0.astype(date_time_type) #level 0 convert to type of date time   
  level_1 = create_category_column(idx1, order_subjects) #level 0 convert to type of date time   
  level_2 = create_category_column(idx2, order_groups) #level 0 convert to type of date time   
  return level_0, level_1, level_2

def incal_create_levels(df, dict_groups):
    # https://stackoverflow.com/questions/34417970/pandas-convert-index-type-in-multiindex-dataframe
  date_time_type, order_subjects, order_groups = get_incal_levels_properties(dict_groups)
  idx = df.index
  l0, l1, l2 = design_incal_levels(idx.levels[0], idx.levels[1], idx.levels[2], date_time_type, order_subjects, order_groups)
  return df.index.set_levels([l0, l1, l2])

# group column and set multiindex format for analysis
def create_category_column(df , categories, ordered=True):
    '''
    order_categoreis_columns make sure the group and subjects in the right order. This is for,
    the statiscal analysis. The groups and the subjects needs to be in order of the expriment design.
    In order the anova, ancova and anova with interaction to work properly
    
    '''
    return pd.Categorical(df, categories=categories, ordered=True)

def replace_ids_to_group_id(ndarray_ids, groups_names, subjects_within_group):
  conditiones = [ndarray_ids == str(n) for n in subjects_within_group]
  choices = groups_names
  return np.select(conditiones, choices, ndarray_ids)

def incal_create_group_column_from_ids(df, ids_column_name, dict_groups):
  n_ids_multiple_name = lambda name, n: [name] * len(n)
  subjects_vlaues = df[ids_column_name].values
  items = dict_groups.items()
  groups_names = flat_list([n_ids_multiple_name(group, ids) for group, ids in items])
  subjects_within_groups = flat_list([ids for ids in dict_groups.values()])
  return replace_ids_to_group_id(subjects_vlaues, groups_names, subjects_within_groups)

def incal_assemble_multi_index_format(
  df, 
  ids_column_name, 
  dict_groups, 
  group_column_name):
  
  date_time = df.index.to_frame().reset_index(drop=True)
  
  subjects = df[ids_column_name].reset_index(drop=True)

  subjects_order = [str(n) for n in flat_list(dict_groups.values())]
  cat_subjects = create_category_column(subjects, subjects_order)

  groups_values = incal_create_group_column_from_ids(df, ids_column_name, dict_groups)
  groups = pd.Series(groups_values,  copy=False, name=group_column_name)
  cat_groups = create_category_column(groups, dict_groups.keys())
  df = df.drop(columns='subjectID')

  frame_datetime_subjects_groups = pd.concat([date_time, pd.Series(cat_subjects, copy=False, name=ids_column_name), pd.Series(cat_groups, copy=False, name=group_column_name)], axis=1)
  multi_index = pd.MultiIndex.from_frame(frame_datetime_subjects_groups)
  return pd.DataFrame(df.values, columns=df.columns, index=multi_index)

# removing subjects or group
def get_values_level(df, number_or_index_name):
  return df.index.get_level_values(number_or_index_name)

def get_difference_from_list_2(list1, list2):
    return list(set(list1) - set(list2))

def incal_remove_subjects(df, number_or_index_name, subjects_to_remove):
  subjects = get_values_level(df, number_or_index_name)
  strs_to_ints = lambda l: [int(x) for x in l]
  print(subjects, subjects_to_remove)
  subjects_to_remove_ints = strs_to_ints(subjects_to_remove)
  selected_subjects = get_difference_from_list_2(subjects, subjects_to_remove_ints)
  return df.loc[:, selected_subjects , :]

def incal_remove_group(df, number_or_index_name, groups_to_remove):
  subjects = get_values_level(df, number_or_index_name)
  selected_group = get_difference_from_list_2(subjects, groups_to_remove)
  return df.loc[:, : , selected_group]

def select_columns_by_metebolic_parm(df, param_name, exclude=False):
    if exclude == True:
        mask = ~df.columns.str.contains(pat=param_name)
        return df.loc[:, mask]
    mask = df.columns.str.contains(pat=param_name)
    return df.loc[:, mask]

def selecting_multi_column_by_part_of_name(df, list_pattern_parm):
    return df.filter(regex='|'.join(list_pattern_parm))

def multi_columns_by_metabolic_param(df, list_met_param, number):
    # https://stackoverflow.com/questions/21285380/find-column-whose-name-contains-a-specific-string
    columns_for_calc = df.columns[df.columns.astype("string").str.contains(pat="|".join(list_met_param))]
    df_calc = df[columns_for_calc].apply(lambda x: x * number)
    drop_old_columns = df.drop(columns_for_calc, axis=1)
    return pd.concat([drop_old_columns, df_calc], axis=1)

def loop_func_and_dfs(dfs, func, *args):
    return [func(df, *args) for df in dfs]

def _get_columns_names_list(df):
    return df.columns.values.tolist()

def _make_dict_to_replace_names(columns_names_list, pattern_addition_to_parms):
    leng = len(columns_names_list)
    return {columns_names_list[i]: pattern_addition_to_parms + columns_names_list[i] for i in range(leng)}

def _get_actuals_values(df):
    df_actuals_features_calculeted = df.diff()
    first_row_df_cumuletive = df.iloc[0:1]
    return df_actuals_features_calculeted.fillna(first_row_df_cumuletive)

def incal_get_actuals_from_cumuletive(df, columns_pattern, pattern_addition_to_parms):
    # get just the cumuletive columns from the original df
    df_cumuletive_culumns = select_columns_by_metebolic_parm(df, columns_pattern)
    # get the columns names of the cumuletive columns
    columns_names = _get_columns_names_list(df_cumuletive_culumns)
    # dict to replace names
    dict_new_names = _make_dict_to_replace_names(columns_names, pattern_addition_to_parms)
    # replace the columns names of the actuals culumns
    df_actuals_features = df_cumuletive_culumns.rename(columns=dict_new_names)
    df_actuals = _get_actuals_values(df_actuals_features)
    return pd.concat([df, df_actuals], axis=1).drop(columns_names, axis=1)

def incal_calc_cumuletive_values(df, columns_pattern):
    select_cols = df.columns.astype("string").str.contains(pat = columns_pattern)
    actuals = df.loc[:, select_cols]
    actuals_columns_names = actuals.columns.values.tolist()
    new_cols_names = [name.replace(columns_pattern, '') for name in actuals_columns_names]
    langth = len(actuals_columns_names)
    cumuletive = actuals.rename(columns={actuals_columns_names[i]: new_cols_names[i] for i in range(langth)}).cumsum()
    return pd.concat([df, cumuletive], axis=1)

def incal_set_multindex(df, list_of_multi_index):
  ids_indexed_df = df.reset_index()
  return ids_indexed_df.set_index(list_of_multi_index)
  
def incal_groupby_then_agg(df, list_to_groupby, agg_func):
    groupby = df.groupby(list_to_groupby)
    return groupby.agg(agg_func)

def incal_resample(df_unstacked_subjects, role_to_resmple_by, agg_func):
    # refactoring - > make it more genric function
  # https://stackoverflow.com/questions/15799162/resampling-within-a-pandas-multiindex
  return incal_groupby_then_agg(
                df_unstacked_subjects, 
                [
                    pd.Grouper(level='Date_Time_1', freq=role_to_resmple_by), 
                    pd.Grouper(level='subjectID')
                ], 
                agg_func)

def _multi_index_df_unstack(df_multi_indexed):
  return df_multi_indexed.unstack()

def _return_original_stacked_df(df_unstacked_subjects):
  return df_unstacked_subjects.stack().reset_index(level=1)

def incal_cumsum(df, list_of_multi_index, list_columns_names_to_cumsum):
  multi_indexed_df = incal_set_multindex(df, list_of_multi_index)
  unstacked_df = _multi_index_df_unstack(multi_indexed_df)
  cumsum_columns = unstacked_df[list_columns_names_to_cumsum].cumsum()
  cumsum_columns.columns = cumsum_columns.columns.map(lambda s: (s[0] + '_cumsum', s[1])) 
  concat_cumsum_columns = pd.concat([unstacked_df, cumsum_columns], axis=1)
  return _return_original_stacked_df(concat_cumsum_columns)

def _right_sepert_first_underscore(string):
    return tuple(string.rsplit("_", 1))

def _assemble_multi_index_axis_1_df(df, d_list, axis_1_names=["", ""]):
    # make a multi index 
    mul_i_columns = pd.MultiIndex.from_tuples(d_list, names=axis_1_names)
    # assemble new dataframe with multi index columns  
    return pd.DataFrame(df.values, index=df.index, columns=mul_i_columns)
    # then stack level 1 to the columns (level 1 -> subjects names e.g. 1 2 3...)

def incal_wide_to_long_df(wide_df, col_subj_name='subjectID'):
    cols_names = _get_columns_names_list(wide_df)
    # sepert feature name from cage number and put it in a tuple together ('allmeters', '1')
    l_micolumns  = [_right_sepert_first_underscore(col) for col in cols_names]
    multi_index_axis_1_df = _assemble_multi_index_axis_1_df(
        wide_df, 
        l_micolumns, 
        ['', col_subj_name]
    )
    # https://pandas.pydata.org/docs/user_guide/reshaping.html
    return multi_index_axis_1_df.stack(level=1)

def flatten(lst_in_lst):
    lst = []
    for l in lst_in_lst:
        if type(l) in [list,tuple,set]:
            lst.extend(l)
        else:
            return lst_in_lst
    return lst

def order_categoreis_columns(df, **kargs):
    '''
    order_categoreis_columns make sure the group and subjects in the right order. This is for,
    the statiscal analysis. The groups and the subjects needs to be in order of the expriment design.
    In order the anova, ancova and anova with interaction to work properly
    
    '''
    for col_name, order in kargs.items():
        df[col_name] = pd.Categorical(df[col_name], ordered=True, categories=flatten(order))
        
def day_and_night(df, datetime_column='Date_Time_1', start=7, end=19):
    df = df.assign(
        time=lambda x: np.where(
          df[datetime_column].dt.hour.ge(start) 
          & df[datetime_column].dt.hour.lt(end), 'Day', 'Night')).dropna()
    return df

def incal_make_averages_table(df, columns_names_too_groupby=['Group', 'subjectID'], column_name_for_time_of_day='time'):
    full_day = df.groupby(by=columns_names_too_groupby, sort=True, dropna=True).mean().reset_index().dropna()
    full_day[column_name_for_time_of_day] = 'Full day'
    D_and_N_df = day_and_night(df).groupby(by=[column_name_for_time_of_day, *columns_names_too_groupby], sort=True, dropna=True).mean().reset_index().dropna()
    return pd.concat([full_day, D_and_N_df])

# day and night time this data use for the graph below
def make_lists_start_and_end_to_day_night_time(df, datetime64_column='Date_Time_1', start=7, end=19):
    array_data_list = df[datetime64_column].unique()
    Series_datetime64 = pd.Series(array_data_list, name=datetime64_column)
    mask_daylight = Series_datetime64.dt.hour.ge(start) & Series_datetime64.dt.hour.lt(end)
    start_end = []
    still_True = False
    for i in range(len(Series_datetime64)):
        if still_True and mask_daylight.iloc[i]:
            start_end.append(Series_datetime64.iloc[i])
            still_True = False
        elif not still_True and not mask_daylight.iloc[i]:
            start_end.append(Series_datetime64.iloc[i])
            still_True = True
    return start_end

# stats
anova_features = [
  'rq', 
  'locomotor_activity', 
  'actual_pedmeters_cumsum', 
  'actual_allmeters_cumsum'
]
ancova_and_anova_with_interaction_features = [
  'Energy_Balance', 
  'kcal_hr',
  'vo2', 
  'vco2', 
  'actual_foodupa', 
  'actual_waterupa', 
  ]

def reanem_df_by_with_list_by_index(df, indexed_new_names):
  columns_names = df.columns.values.tolist()
  new_columns_names = indexed_new_names
  zip_lists = zip(columns_names, new_columns_names)
  dict_renamed_columns = {column_name: new_column_name for column_name, new_column_name in zip_lists}
  return df.rename(columns=dict_renamed_columns)

def concat_dfs(list_of_series_dfs):
  return pd.concat(list_of_series_dfs, axis=1)

def anova_with_interaction(df, metabolic_var, independent, categorical):
    return ols(f'{metabolic_var} ~ {independent} + C({categorical}) + {independent}:C({categorical})', data=df).fit().pvalues
def ancova(df, metabolic_var, independent, categorical):
    return ols(f'{metabolic_var} ~ {independent} + C({categorical})', data=df).fit().pvalues
def anova(df, metabolic_var, categorical):
    return ols(f'{metabolic_var} ~ C({categorical})', data=df).fit().pvalues

def make_pvalues_of_anova_analysis(df, m_vars, cat_var):
  return [anova(df, m_var, cat_var) for m_var in m_vars]
def make_pvalues_of_ancova_analysis(df, m_vars, independent, cat_var):
  return [ancova(df, m_var, independent, cat_var) for m_var in m_vars]
def make_pvalues_of_anova_with_interaction_analysis(df, m_vars, independent, cat_var):
  return [anova_with_interaction(df, m_var, independent, cat_var) for m_var in m_vars]

def match_case(case, df, list_of_features, independent, category_col_name):
  cases = {
    'anova': make_pvalues_of_anova_analysis(
                                df, 
                                list_of_features, 
                                category_col_name
                                ),
    'ancova': make_pvalues_of_ancova_analysis(
                    df, 
                    list_of_features, 
                    independent,
                    category_col_name
                    ), 
    'anova_with_interaction': make_pvalues_of_anova_with_interaction_analysis(
                    df, 
                    list_of_features, 
                    independent,
                    category_col_name
                    ), 
    
  }
  return cases[case]

def incal_create_pvalues_datafram(case, df, list_of_features, independent, category_col_name):
  results_from_anovafunction = match_case(case, df, list_of_features, independent, category_col_name)
  pvalues_dfs_concated = concat_dfs(results_from_anovafunction)
  return reanem_df_by_with_list_by_index(pvalues_dfs_concated, list_of_features)

def create_anovas_table(df):
    anova_df = incal_create_pvalues_datafram(
    'anova', 
    df, 
    anova_features, 
    'bodymass',
    'Group'
    )
    anova_with_interaction_df = incal_create_pvalues_datafram(
    'anova_with_interaction', 
    df, 
    ancova_and_anova_with_interaction_features, 
    'bodymass',
    'Group'
    )
    # algoritem that get each non p value in anova with interaction and replace it with anova values and fill nan where is needed
    ancova_df = incal_create_pvalues_datafram(
    'ancova', 
    df, 
    ancova_and_anova_with_interaction_features, 
    'bodymass',
    'Group'
    )
    return concat_dfs([anova_df, anova_with_interaction_df, ancova_df]).T

unwanted_column = "|".join(['envirolightlux', 'envirooccupancy', 'envirorh', 'envirosound', 'envirotemp'])
cumulative_parm = "|".join(['food', 'water', 'allmeters', 'wheelmeters', 'pedmeters'])
not_for_use_columns = ['vh2o', 'xbreak', 'ybreak', 'index'] # do nothig with it becouse it not importent to delete now this columns
pattern_addition_to_parms = 'actual_'
regx_pattern_for_mean = 'vo2|vco2|vh2o|rq_|bodymass|rq|kcal_hr|break_'
regx_pattern_for_sum = 'water|food'

dict_aggrageted_function_for_column = {
  'Energy_Balance': 'mean',
  'actual_allmeters': 'mean',
  'actual_pedmeters': 'mean',
  'bodymass': 'mean',
  'kcal_hr': 'mean',
  'locomotor_activity': 'mean',
  'rq': 'mean',
  'vco2': 'mean',
  'vo2': 'mean',
  'vh2o': 'mean',
  'xbreak': 'mean',
  'ybreak': 'mean',
  'actual_foodupa': 'sum', 
  'actual_waterupa': 'sum',
}
regx_pattern_no_mean_or_sum = regx_pattern_for_sum + regx_pattern_for_mean

# Merge files!!
df_or_dfs_in_list = [dataframes] if is_one_file else dataframes.values()
dfs = [incal_get_actuals_from_cumuletive(df, cumulative_parm, pattern_addition_to_parms) for df in df_or_dfs_in_list]
dfs_concated = pd.concat(dfs)

dfs_concated_cleaned = select_columns_by_metebolic_parm(dfs_concated, unwanted_column, True) 

dfs_concated = incal_set_multindex(dfs_concated, ['Date_Time_1']).drop(columns='index')
analysis_format = incal_wide_to_long_df(dfs_concated)

analysis_format[['vco2', 'vh2o', 'vo2']] = analysis_format[['vco2', 'vh2o', 'vo2']].mul(60)
analysis_format[['actual_foodupa']] = analysis_format[['actual_foodupa']].mul(3.56)
analysis_format['Energy_Balance'] = analysis_format['actual_foodupa'].values - analysis_format['kcal_hr'].values
analysis_format['locomotor_activity'] = analysis_format[['xbreak', 'ybreak']].sum(axis=1)

values_df = analysis_format.values
index_date_time = pd.DatetimeIndex(analysis_format.index.get_level_values(0))
index_subjects = analysis_format.index.get_level_values(1)
index_series_datetime = pd.Series(index_date_time)
index_series_subjects = pd.Series(index_subjects)
index_frame = pd.concat([index_series_datetime, index_series_subjects], axis=1)
multi_index = pd.MultiIndex.from_frame(index_frame)
columns_names = analysis_format.columns
data_frame = pd.DataFrame(values_df, multi_index, columns_names)
resampled_analysis_format = incal_resample(data_frame, 'H', dict_aggrageted_function_for_column)

featuers_to_cumsum_by = ['actual_pedmeters', 'actual_allmeters']
analysis_format = incal_cumsum(analysis_format, ['Date_Time_1', 'subjectID'], featuers_to_cumsum_by)
resampled_analysis_format = incal_cumsum(resampled_analysis_format, ['Date_Time_1', 'subjectID'], featuers_to_cumsum_by)

add_feature_for_agg = {
  **dict_aggrageted_function_for_column, 
  'actual_allmeters_cumsum': 'mean',
  'actual_pedmeters_cumsum': 'mean'
}

# analysis format - with original datetime samples
# when waring with datafram less then 15 K rows we can try to use analysis format

# analysis format - datetime agg rolling mean
resampled_analysis_format = incal_assemble_multi_index_format(resampled_analysis_format, 'subjectID', dict_groups, 'Group')
# analysis format - averages for each subject
grouped_analysis_format_df = resampled_analysis_format.groupby(level=['subjectID', 'Group'])
analysis_format_calculeted = grouped_analysis_format_df.agg(add_feature_for_agg).dropna()
# TODO: need to understend why it not average 
analysis_format_calculeted['Energy_Balance'] = analysis_format_calculeted['actual_foodupa'].values - analysis_format_calculeted['kcal_hr'].values

# display(analysis_format.head(2), analysis_format.shape)
display(resampled_analysis_format.head(2), resampled_analysis_format.shape)
display(analysis_format_calculeted.head(2), analysis_format_calculeted.shape)

def assemble_dash_table(dff):
    return {
        'columns': [{"name": i, "id": i} for i in dff.columns],
        'data': dff.to_dict('records')
    }

def get_start_and_end_time(tuple_start_end, dict_time_stamps):
    start, end = (str(i) for i in tuple_start_end)
    return dict_time_stamps[start], dict_time_stamps[end]

def trim_df_datetime(df, start_time, end_time):
    return df.loc[start_time:end_time]

def assemble_data_to_dom(df, feature_name):
    return df.loc[(), (feature_name, 'bodymass')]

def get_group_or_individual_df(df, category_name, feature_calc):
    time_series_group_or_individual_dfs = {
        'Group': df.groupby([df.index.get_level_values('Date_Time_1'), df.index.get_level_values(category_name)]).agg(feature_calc),
        'subjectID': df
        }
    return time_series_group_or_individual_dfs[category_name]

def graphs_maker(clean_df, df_grouped, group_or_individual_df_averages, averages_df, feature_name, category_name, colors):
    time_series_graph = px.scatter(
        x=df_grouped.index.get_level_values('Date_Time_1'),
        y=df_grouped[feature_name].values,
        color=df_grouped.index.get_level_values(category_name), 
        color_discrete_sequence=colors, 
        template='simple_white'
    ).update_traces(mode='lines+markers')

    histogram = px.histogram(
        x=df_grouped[feature_name].values, 
        color=df_grouped.index.get_level_values(category_name).values, 
        color_discrete_sequence=colors,
        template='simple_white'
        )

    box = px.box(
        x=df_grouped.index.get_level_values(category_name).values,
        y=df_grouped[feature_name].values,
        color=df_grouped.index.get_level_values(category_name).values,
        color_discrete_sequence=colors,
        template='simple_white'
        )
    
    averages = px.bar( # 48
        x=group_or_individual_df_averages.index.get_level_values(category_name),
        y=group_or_individual_df_averages[feature_name].values,
        color=group_or_individual_df_averages.index.get_level_values(category_name),
        color_discrete_sequence=colors, 
        template='simple_white'
    )
    regression = px.scatter(
        x=averages_df['bodymass'].values,
        y=averages_df[feature_name].values if feature_name != 'bodymass' else averages_df['kcal_hr'].values,
        color=averages_df.index.get_level_values(1),
        color_discrete_sequence=colors, 
        template='simple_white', 
        trendline='ols'
    )  
    return time_series_graph, histogram, box, averages, regression

  # dashboard code. 
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = JupyterDash(__name__, external_stylesheets = external_stylesheets)

df = resampled_analysis_format.copy()
df_removed_outliers = remove_outliers_mixed_df(df)
display(resampled_analysis_format)
# for layout
features = df.columns.values.tolist()
subjects_ids = df.index.get_level_values(1)
legand_color_order = np.sort([str(n) for n in subjects_ids.unique().values])
# select group or subject (category name)
categories_columns_names = get_categories_cals_names(df)
obj_categories_columns_names = [{
    'label': feature,
    'value': feature
} for feature in categories_columns_names]

# trim data - range slider
time_stamps = df.index.get_level_values(0)
shape_analysis_format_indexed = df.shape
end_point_index_analysis_format_indexed = shape_analysis_format_indexed[0] - 1
marks_indexed_time_stamp = {
    i: time_stamps[i]
    for i in range(shape_analysis_format_indexed[0])
}
# data for Dropdown - removing group or subjects
subjects = df.index.get_level_values(1).unique()
groups = df.index.get_level_values(2).unique()
multi_selection_subjects = [{
    'label': str(subject),
    'value': str(subject)
} for subject in subjects]
multi_selection_groups = [{
    'label': str(group),
    'value': str(group)
} for group in groups]

# remove specific point
rows_ids, rows_ind = df.index.factorize()

app.layout = html.Div([
    html.Div([
        html.Div([
            dcc.Dropdown(id='feature_y_axis_dropdown',
                         options=[{
                             'label': i,
                             'value': i
                         } for i in features],
                         value=features[0]),
            dcc.Dropdown(id='show_as_group_or_individual',
                         options=obj_categories_columns_names,
                         value=categories_columns_names[0]),
            dcc.Dropdown(id='remove_specific_value',
                         options=[],
                         multi=True,
                         value=[]),
            dcc.Dropdown(id='remove_group',
                         options=multi_selection_groups,
                         multi=True),
            dcc.Dropdown(id='remove_subjects',
                         options=multi_selection_subjects,
                         multi=True),
            dcc.Checklist(id='checklist_outliears',
                          options=[
                              {
                                  'label': 'Remove outliers',
                                  'value': 'True'
                              },
                          ],
                          value=[],
                          labelStyle={'display': 'inline-block'}),
            dcc.RangeSlider(id="range_slider_trim_time_series",
                            marks=marks_indexed_time_stamp,
                            value=(0, end_point_index_analysis_format_indexed),
                            allowCross=False,
                            min=0,
                            max=end_point_index_analysis_format_indexed),
        ]),
    ]),
    html.Div([
        html.Button('Save data in each graph', id='save_data', n_clicks=0),
        dcc.Graph(id='scatter_time_series', clickData={}),
        dcc.Graph(id='averages'),
        dcc.Graph(id='box', clickData={}),
        dcc.Graph(id='hist'),
        dcc.Graph(id='regression', clickData={}),
        dash_table.DataTable(id='stats_table_Pvalue')
    ]),
    html.Div([])
])


def remove_data_point(data, row_index, feature):
    # remove where keys feature is place
    data.at[row_index, feature] = np.nan


def for_loop_removeing_data_point(data, rows, feature):
    for row in rows:
        i_row = int(row.split(' ')[0])
        remove_data_point(data, rows_ind[i_row], feature)


def get_dff(df, v_feature, is_removed_points=False, **kwargs):
    dff = df.copy()
    return dff[v_feature]


def click_data_points(df, click_data, feature):
    point_info = click_data['points'][0]
    # datetime
    Timestamp = pd.Timestamp
    x_datetime = Timestamp(point_info['x'])
    # subject number
    index_legand = point_info['curveNumber']  # witch cage
    subject_number = int(legand_color_order[index_legand])
    # witch group
    group = [
        item[0] for item in list(dict_groups.items())
        if subject_number in item[1]
    ][0]
    # index number
    # tuple like ids_ind
    date_time, subject, group = x_datetime, str(subject_number), group  # example: (Timestamp('2021-07-28 16:00:00'), 6, 'Group_3')
    # make tuple
    row_ind = (date_time, subject, group)
    # get a list for the .index func
    
    rows_ind = df.index.to_list()
    # use .index func to find the index of the row in the list of rows
    index = rows_ind.index(row_ind)
    # return tuple of index datetime subject and group
    return index, date_time, subject, group


def create_scatter(dff, colors):
    x_axis = dff.index.get_level_values(0)
    color = dff.index.get_level_values(1)
    y_axis = dff.values

    fig = px.scatter(x=x_axis,
                     y=y_axis,
                     color=color,
                     color_discrete_sequence=colors,
                     template='simple_white')
    fig.update_traces(mode='lines+markers')
    fig.update_layout(legend_traceorder="normal")
    return fig


def create_bar(averages, colors):
    groups = averages.index.get_level_values(1)
    y_axis = averages.values
    return px.bar(x=groups,
                  y=y_axis,
                  color=groups,
                  color_discrete_sequence=colors,
                  template='simple_white')


def create_histogram(time_series, category_name, colors):
    x_axis = time_series.values
    color_group = time_series.index.get_level_values(category_name).values
    return px.histogram(x=x_axis,
                        color=color_group,
                        color_discrete_sequence=colors,
                        template='simple_white')


def create_regression(averages_df, colors, feature_name):
    group_color = averages_df.index.get_level_values(1)
    x_axis = averages_df['bodymass'].values
    feature_name = feature_name if feature_name != 'bodymass' else 'rq'
    y_axis = averages_df[feature_name].values
    return px.scatter(x=x_axis,
                      y=y_axis,
                      color=group_color,
                      color_discrete_sequence=colors,
                      template='simple_white',
                      trendline='ols')


def create_box(time_series, category_name, colors):
    x_axis = time_series.index.get_level_values(category_name).values
    y_axis = time_series.values
    return px.box(x=x_axis,
                  y=y_axis,
                  color=x_axis,
                  color_discrete_sequence=colors,
                  template='simple_white')


def removing_group_or_subjects(data, remove_group, remove_subjects):
    if remove_group:
        return incal_remove_group(data, 2, remove_group)
    elif remove_subjects:
        print(remove_subjects)
        return incal_remove_subjects(data, 1, remove_subjects)
    return data


def create_average_df(data, features_calc):
    # averages df
    subjects_ids = data.index.get_level_values('subjectID')
    groups_ids = data.index.get_level_values('Group')
    return data.groupby([subjects_ids, groups_ids]).agg(features_calc).dropna(
    )  # dropna to get rid from the 0 and nan where groupby calc on subject that dosn't belong to group


def groupby_category(data, category, features_calc):
    if category == 'subjectID':
        return data
    datetime = data.index.get_level_values('Date_Time_1')
    groups = data.index.get_level_values(category)
    grouped_data = data.groupby([datetime, groups])
    return grouped_data.agg(features_calc).dropna()


def statstical_analysis(averages_df):
    grouped_analysis_format_df = averages_df
    analysis_format_calculeted_index_reseted = \
        grouped_analysis_format_df.reset_index()
    p_values_table = create_anovas_table(
        analysis_format_calculeted_index_reseted)
    p_values_table = p_values_table.reset_index().rename(
        columns={'index': 'Features'})
    columns = [{'id': p, 'name': p} for p in p_values_table.columns.to_list()]
    table = p_values_table.to_dict('records')
    return columns, table, p_values_table


@dash.callback(Output('remove_specific_value', 'options'),
               Input('feature_y_axis_dropdown', 'value'))
def dropdown_rows_ids_feature(feature_name):
    datetime = 0
    subject = 1
    group = 2
    return [{
        'label':
        f'{rows_ids[i]} {str(rows_ind[i][datetime])} {rows_ind[i][subject]} {rows_ind[i][group]}',
        'value':
        f'{rows_ids[i]} {str(rows_ind[i][datetime])} {rows_ind[i][subject]} {rows_ind[i][group]}'
    } for i in range(len(rows_ind))]


@dash.callback(
    Output('scatter_time_series', 'figure'),
    Output('averages', 'figure'),
    Output('box', 'figure'),
    Output('hist', 'figure'),
    Output('regression', 'figure'),
    Output('stats_table_Pvalue', 'columns'),
    Output('stats_table_Pvalue', 'data'),
    Output('remove_specific_value', 'value'),
    Input('feature_y_axis_dropdown', 'value'),
    Input('remove_subjects', 'value'),
    Input('remove_group', 'value'),
    Input('checklist_outliears', 'value'),
    Input('range_slider_trim_time_series', 'value'),
    Input('range_slider_trim_time_series', 'marks'),
    Input('scatter_time_series', 'clickData'),
    Input('show_as_group_or_individual', 'value'),  # it is there to call the 
    State('show_as_group_or_individual', 'value'),
    State('feature_y_axis_dropdown',
          'value'),  # getting current feature name from dropdown
    State('remove_specific_value', 'value'),
    Input('save_data', 'n_clicks'))
def pool_dashboard_data(value_feature, remove_subjects, remove_group,
                        checklist_outliers, tuple_start_end, dict_time_stamps,
                        click_data, input_category, category_name,
                        state_feature, strs_remove_specific_values, n_clicks_save_data):

    info = dash.callback_context
    is_clickData_triggered = info.triggered[0][
        'prop_id'] == 'scatter_time_series.clickData'
    is_feature_y_axis_triggered = info.triggered[0][
        'prop_id'] == 'feature_y_axis_dropdown.value'
    # remove outliers
    data = df.copy() if not checklist_outliers else df_removed_outliers.copy()
    # remove specific points
    if is_feature_y_axis_triggered:
        strs_remove_specific_values = []
    if is_clickData_triggered:  # removing data points that been click
        i, datetime, subject, group = click_data_points(
            data, click_data,
            state_feature)  # (Timestamp('2021-08-01 13:00:00'), 7, 'Control')
        strs_remove_specific_values.append(f'{i} {datetime} {subject} {group}')
    if strs_remove_specific_values:
        for_loop_removeing_data_point(data, strs_remove_specific_values,
                                      state_feature)  # inplace
        strs_remove_specific_values = [
            str(row) for row in strs_remove_specific_values
        ]

    dropdown_ids_rows = strs_remove_specific_values

    # trim datetime from the sides
    start_time, end_time = get_start_and_end_time(tuple_start_end,
                                                  dict_time_stamps)
    data = trim_df_datetime(data, start_time, end_time)

    # removing group or subjects depnding on the selection
    data = removing_group_or_subjects(data, remove_group, remove_subjects)

    # selecting and grouping data
    features_calc = add_feature_for_agg  # dict - key (column_name): value (calc for parmeter) - this dict is for aggregetion function for each feature
    # creating an averages df
    averages_df = create_average_df(data, features_calc)
    averages_df['Energy_Balance'] = averages_df[
        'actual_foodupa'].values - averages_df['kcal_hr'].values
    # timeseries - groupby subject or groups
    time_series_df = groupby_category(data, category_name, features_calc)
    # selecting column by "state_feature" (feature state is all the parmeters of the data i.e: Energy_Balance)
    # selecting for each "_df" (averages_df, time_series_df)
    time_series = get_dff(time_series_df, state_feature)
    averages = get_dff(averages_df, state_feature)

    colors = px.colors.qualitative.Vivid
    fig_scatter = create_scatter(time_series, colors)
    fig_bar = create_bar(averages, colors)
    fig_box = create_box(time_series, category_name, colors)
    fig_histogram = create_histogram(time_series, category_name, colors)
    fig_regression = create_regression(averages_df, colors, state_feature)

    # analysis section
    columns, table, p_values_table = statstical_analysis(averages_df)
    if n_clicks_save_data:
        averages_df.to_csv('averages table.csv')
        p_values_table.to_csv('p values table.csv')


    return fig_scatter, fig_bar, fig_box, fig_histogram, fig_regression, columns, table, dropdown_ids_rows

def run_server(self,
               port=8001,
               debug=True,
               threaded=True,
               **flask_run_options):
    self.server.run(port=port, debug=debug, **flask_run_options)

if __name__ == '__main__':
    app.run_server(debug=True, port=8021, mode='external')

OrderedDict([('Control', [1, 4, 7, 10, 13]), ('six_M', [3, 5, 9, 12, 16]), ('three_M', [2, 6, 8, 11, 14, 15])])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Energy_Balance,actual_allmeters,actual_allmeters_cumsum,actual_foodupa,actual_pedmeters,actual_pedmeters_cumsum,actual_waterupa,bodymass,kcal_hr,locomotor_activity,rq,vco2,vh2o,vo2,xbreak,ybreak
Date_Time_1,subjectID,Group,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2021-08-10 16:00:00,1,Control,-0.231982,3.726051,3.726051,0.20324,3.089252,3.089252,0.069285,23.53824,0.333602,395.5,0.841608,57.747723,3.419409,68.44284,189.5,206.0
2021-08-10 16:00:00,10,Control,-0.354971,0.0,0.0,0.0,0.0,0.0,0.0,23.79335,0.354971,0.0,0.790857,58.323651,4.761358,73.70328,0.0,0.0


(384, 16)

Unnamed: 0_level_0,Unnamed: 1_level_0,Energy_Balance,actual_allmeters,actual_pedmeters,bodymass,kcal_hr,locomotor_activity,rq,vco2,vo2,vh2o,xbreak,ybreak,actual_foodupa,actual_waterupa,actual_allmeters_cumsum,actual_pedmeters_cumsum
subjectID,Group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1,Control,8.685223,3.474006,2.701177,24.664162,0.324571,413.465278,0.797062,54.883036,66.955242,5.603571,186.673611,226.791667,9.009794,1.963524,54.982616,43.857166
4,Control,11.892066,1.87551,1.273399,26.371733,0.402722,256.881944,0.874225,71.887785,82.013219,8.454145,93.895833,162.986111,12.294788,1.398411,24.931666,16.983844


(16, 16)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Energy_Balance,actual_allmeters,actual_allmeters_cumsum,actual_foodupa,actual_pedmeters,actual_pedmeters_cumsum,actual_waterupa,bodymass,kcal_hr,locomotor_activity,rq,vco2,vh2o,vo2,xbreak,ybreak
Date_Time_1,subjectID,Group,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2021-08-10 16:00:00,1,Control,-0.231982,3.726051,3.726051,0.203240,3.089252,3.089252,0.069285,23.538240,0.333602,395.5,0.841608,57.747723,3.419409,68.44284,189.5,206.0
2021-08-10 16:00:00,10,Control,-0.354971,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,23.793350,0.354971,0.0,0.790857,58.323651,4.761358,73.70328,0.0,0.0
2021-08-10 16:00:00,11,three_M,-0.187435,1.075041,1.075041,0.348722,0.872384,0.872384,0.016660,20.857915,0.361796,142.0,0.936227,67.987158,4.184996,72.72315,34.0,108.0
2021-08-10 16:00:00,12,six_M,-0.340636,0.754128,0.754128,0.077348,0.432694,0.432694,0.079349,23.846010,0.379310,152.5,0.916464,70.526403,5.488034,76.45467,17.0,135.5
2021-08-10 16:00:00,13,Control,-0.331457,0.130627,0.130627,0.000000,0.084335,0.084335,0.000000,27.210390,0.331457,9.0,0.919918,61.530762,5.727485,66.83691,2.5,6.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-11 15:00:00,5,six_M,-0.316153,0.116000,34.598646,0.000000,0.000000,24.854424,0.000000,22.810300,0.316153,38.0,0.870102,56.113068,6.263580,64.47390,0.0,38.0
2021-08-11 15:00:00,6,three_M,-0.299499,0.045600,55.315535,0.000000,0.001700,41.494850,0.000000,23.470860,0.299499,28.0,0.716995,45.377262,6.007974,63.26106,18.0,10.0
2021-08-11 15:00:00,7,Control,-0.335119,0.089900,65.348766,0.000000,0.000000,46.024857,0.000000,26.274230,0.335119,4.0,0.702508,49.829532,5.555267,71.04984,1.0,3.0
2021-08-11 15:00:00,8,three_M,-0.293128,0.000000,45.448544,0.000000,0.000000,38.591754,0.000000,21.592380,0.293128,0.0,0.703744,43.694220,6.197088,62.11674,0.0,0.0


Dash app running on http://127.0.0.1:8021/


In [2]:
# http://127.0.0.1:8024/

Index([ (2021-08-10 16:00:00, '1', 'Control'),
       (2021-08-10 16:00:00, '10', 'Control'),
       (2021-08-10 16:00:00, '11', 'three_M'),
         (2021-08-10 16:00:00, '12', 'six_M'),
       (2021-08-10 16:00:00, '13', 'Control'),
       (2021-08-10 16:00:00, '14', 'three_M'),
       (2021-08-10 16:00:00, '15', 'three_M'),
         (2021-08-10 16:00:00, '16', 'six_M'),
        (2021-08-10 16:00:00, '2', 'three_M'),
          (2021-08-10 16:00:00, '3', 'six_M'),
       ...
       (2021-08-11 15:00:00, '15', 'three_M'),
         (2021-08-11 15:00:00, '16', 'six_M'),
        (2021-08-11 15:00:00, '2', 'three_M'),
          (2021-08-11 15:00:00, '3', 'six_M'),
        (2021-08-11 15:00:00, '4', 'Control'),
          (2021-08-11 15:00:00, '5', 'six_M'),
        (2021-08-11 15:00:00, '6', 'three_M'),
        (2021-08-11 15:00:00, '7', 'Control'),
        (2021-08-11 15:00:00, '8', 'three_M'),
          (2021-08-11 15:00:00, '9', 'six_M')],
      dtype='object', length=384)

In [6]:
dir(rows_ind)
dir(rows_ind.to_flat_index())#.index(('2021-08-10 16:00:00', '3', 'six_M'))

np.argwhere(rows_ind.values == x)


elementwise comparison failed; this will raise an error in the future.



array([], shape=(0, 0), dtype=int64)

(Timestamp('2021-08-10 16:00:00'), '3', 'six_M')

In [6]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Energy_Balance,actual_allmeters,actual_allmeters_cumsum,actual_foodupa,actual_pedmeters,actual_pedmeters_cumsum,actual_waterupa,bodymass,kcal_hr,locomotor_activity,rq,vco2,vh2o,vo2,xbreak,ybreak
Date_Time_1,subjectID,Group,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2021-08-10 16:00:00,1,Control,-0.231982,3.726051,3.726051,0.203240,3.089252,3.089252,0.069285,23.538240,0.333602,395.5,0.841608,57.747723,3.419409,68.44284,189.5,206.0
2021-08-10 16:00:00,10,Control,-0.354971,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,23.793350,0.354971,0.0,0.790857,58.323651,4.761358,73.70328,0.0,0.0
2021-08-10 16:00:00,11,three_M,-0.187435,1.075041,1.075041,0.348722,0.872384,0.872384,0.016660,20.857915,0.361796,142.0,0.936227,67.987158,4.184996,72.72315,34.0,108.0
2021-08-10 16:00:00,12,six_M,-0.340636,0.754128,0.754128,0.077348,0.432694,0.432694,0.079349,23.846010,0.379310,152.5,0.916464,70.526403,5.488034,76.45467,17.0,135.5
2021-08-10 16:00:00,13,Control,-0.331457,0.130627,0.130627,0.000000,0.084335,0.084335,0.000000,27.210390,0.331457,9.0,0.919918,61.530762,5.727485,66.83691,2.5,6.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-11 15:00:00,5,six_M,-0.316153,0.116000,34.598646,0.000000,0.000000,24.854424,0.000000,22.810300,0.316153,38.0,0.870102,56.113068,6.263580,64.47390,0.0,38.0
2021-08-11 15:00:00,6,three_M,-0.299499,0.045600,55.315535,0.000000,0.001700,41.494850,0.000000,23.470860,0.299499,28.0,0.716995,45.377262,6.007974,63.26106,18.0,10.0
2021-08-11 15:00:00,7,Control,-0.335119,0.089900,65.348766,0.000000,0.000000,46.024857,0.000000,26.274230,0.335119,4.0,0.702508,49.829532,5.555267,71.04984,1.0,3.0
2021-08-11 15:00:00,8,three_M,-0.293128,0.000000,45.448544,0.000000,0.000000,38.591754,0.000000,21.592380,0.293128,0.0,0.703744,43.694220,6.197088,62.11674,0.0,0.0


In [7]:
df.loc[:, ['14', '3', '5'] , :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Energy_Balance,actual_allmeters,actual_allmeters_cumsum,actual_foodupa,actual_pedmeters,actual_pedmeters_cumsum,actual_waterupa,bodymass,kcal_hr,locomotor_activity,rq,vco2,vh2o,vo2,xbreak,ybreak
Date_Time_1,subjectID,Group,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2021-08-10 16:00:00,14,three_M,-0.344217,2.005764,2.005764,0.167335,1.514690,1.514690,0.108337,24.115910,0.427885,258.500000,0.938120,80.558310,6.152959,85.96485,58.500000,200.000000
2021-08-10 17:00:00,14,three_M,0.376857,1.187959,3.193723,2.332908,0.746870,2.261560,0.279701,24.342563,0.400779,166.333333,0.990567,78.777960,6.938794,79.58650,55.000000,111.333333
2021-08-10 18:00:00,14,three_M,0.072234,3.023242,6.216965,1.659863,2.569510,4.831070,0.396139,24.845827,0.481053,294.666667,1.003281,95.429140,7.074840,95.28256,120.000000,174.666667
2021-08-10 19:00:00,14,three_M,-0.382260,5.094420,11.311385,0.317249,4.294727,9.125796,0.174191,24.830793,0.488010,513.666667,1.014741,97.769280,8.714882,96.39110,233.000000,280.666667
2021-08-10 20:00:00,14,three_M,-0.388567,3.370340,14.681725,0.000000,2.767847,11.893643,0.111825,24.893627,0.388567,215.666667,0.995797,76.792720,9.288142,77.04508,100.000000,115.666667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-11 11:00:00,5,six_M,-0.379348,0.859913,33.425103,0.000000,0.512527,24.372527,0.067865,22.461283,0.379348,189.666667,0.761634,60.545188,5.265635,79.26544,37.333333,152.333333
2021-08-11 12:00:00,5,six_M,-0.301838,0.419857,33.844960,0.000000,0.219953,24.592480,0.030281,22.569983,0.301838,15.333333,0.762865,48.142638,5.362458,63.07858,3.666667,11.666667
2021-08-11 13:00:00,5,six_M,-0.331129,0.126543,33.971503,0.000000,0.029510,24.621990,0.001260,22.733530,0.331129,49.666667,0.755220,52.617118,5.133655,69.25510,5.666667,44.000000
2021-08-11 14:00:00,5,six_M,-0.343504,0.511143,34.482646,0.000000,0.232433,24.854424,0.044548,22.810300,0.343504,67.333333,0.869355,60.989686,6.288553,70.04542,6.000000,61.333333
