In [2]:
import numpy as np
import pandas as pd
from collections import OrderedDict, Counter
import itertools


def flat_list(d_list):
    '''
    dependencies: itertools
    '''
    return list(itertools.chain.from_iterable(d_list))


def incal_create_df_incal_format(df, dict_groups):
    categories_subjects = flat_list(list(dict_groups.values()))
    categories_groups = list(dict_groups.keys())

    date_time_level = pd.Series((pd.DatetimeIndex(df['Date_Time_1'])),
                                name='Date_Time_1')
    subjects_level = pd.Series(pd.Categorical(df['subjectsID'],
                                              categories=categories_subjects,
                                              ordered=True),
                               name='subjectsID')
    group_level = pd.Series(pd.Categorical(df['Group'],
                                           categories=categories_groups,
                                           ordered=True),
                            name='Group')

    df = df.drop(columns=['Date_Time_1', 'subjectsID', 'Group'])

    multi_index_dataframe = pd.concat(
        [date_time_level, subjects_level, group_level], axis=1)

    return pd.DataFrame(df.values,
                        index=pd.MultiIndex.from_frame(multi_index_dataframe),
                        columns=df.columns.values.tolist())

In [3]:
import pandas as pd
import plotly.express as px
colors = px.colors.qualitative.Alphabet

In [4]:
file_path = 'csvs\InCal_format_all_weeks_shani_exp.csv'
dict_groups = OrderedDict(Control=[1, 4, 7, 10, 13],
                          Group_2=[3, 5, 9, 12, 16],
                          Group_3=[2, 6, 8, 11, 14, 15])
                          
df = pd.read_csv(file_path, parse_dates=['Date_Time_1'])

In [5]:
columns = ['actual_allmeters', 'bodymass', 'actual_foodupa', 'kcal_hr', 'Energy_Balance', 'locomotor_activity']
df_indexed = incal_create_df_incal_format(df, dict_groups)
df_cleaned = df_indexed[columns]
df_cleaned

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,actual_allmeters,bodymass,actual_foodupa,kcal_hr,Energy_Balance,locomotor_activity
Date_Time_1,subjectsID,Group,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-07-28 16:45:00,1,Control,0.820163,24.46564,0.000000,0.407835,-0.407835,201.0
2021-07-28 16:45:00,10,Control,1.404414,22.61497,0.313275,0.401331,-0.088056,211.0
2021-07-28 16:45:00,11,Group_3,3.553978,21.34998,0.006243,0.383572,-0.377329,360.0
2021-07-28 16:45:00,12,Group_2,11.915580,24.69681,0.204500,0.366636,-0.162136,788.0
2021-07-28 16:45:00,13,Control,3.425906,25.82226,0.116263,0.454166,-0.337903,440.0
...,...,...,...,...,...,...,...,...
2021-09-09 12:05:00,5,Group_2,0.000000,22.80760,0.000000,-0.002021,0.002021,0.0
2021-09-09 12:05:00,6,Group_3,0.189700,24.75276,0.000000,0.306832,-0.306832,109.0
2021-09-09 12:05:00,7,Control,0.257000,29.16107,0.000000,0.351710,-0.351710,75.0
2021-09-09 12:05:00,8,Group_3,0.020000,21.82529,0.000000,0.276330,-0.276330,31.0


visualizing the dalta body weight change over time experiment period  

In [6]:
# get the bodymass columns form dataframe (df = dataframe = data table)
bodymass_df = df_cleaned['bodymass']
# make a varibele to groupby on 
group_datetime = pd.Grouper(level=0, freq='12H')
Groups_ids = df_cleaned.index.get_level_values(2)
subjectIDs = df_cleaned.index.get_level_values(1) # optional to see each subject in df

# groupby the group_datetime -> each 12 hrs mean, and Groups_ids -> mean each group
bodymass_df_mean_each12hrs = df_cleaned.groupby(
  [group_datetime, Groups_ids]).agg({'bodymass': 'mean'})

# 
unstack_body_mass  = bodymass_df_mean_each12hrs['bodymass'].unstack() # get each group into seperte column
first_row_body_mass_for_each_group = unstack_body_mass.iloc[0] # get first value from each row
start_minus_each_point = unstack_body_mass - first_row_body_mass_for_each_group # substarct each weight in each row
bodymas_dalta_for_each_group_series = start_minus_each_point.stack() # stack groups columns on a new column (from wide to long format)

# data for visualizing
values = bodymas_dalta_for_each_group_series.values 
Group = bodymas_dalta_for_each_group_series.index.get_level_values(1)
date_time = bodymas_dalta_for_each_group_series.index.get_level_values(0)

fig = px.scatter(
  x=date_time, 
  y=values, 
  color=Group, 
  labels={
    'x': date_time.name, 
    'y': 'bodymass', 
    'color': Group.name
    },
    template='simple_white',
    color_discrete_sequence=colors
  )

fig.update_traces(mode='lines+markers')

# summery:
all groups has add weight on the expriment but as we see in the graph the control group got the most weight.

# Averages for each week and overall

overall

רעיון להוציא את כל הערכים שהם אפס ולסכום עבור פיצ'ר האכילה

In [10]:
Group = df_cleaned.index.get_level_values(2)
subjectIDs = df_cleaned.index.get_level_values(1)
bmass_loco_kcal = ['bodymass', 'locomotor_activity', 'kcal_hr', 'actual_foodupa']
ave_df_overall = df_cleaned[bmass_loco_kcal].groupby([subjectIDs, Group])
df_ace_overall_grouped = ave_df_overall.agg(
  {
    'actual_foodupa': 'sum',
    'locomotor_activity': 'sum', 
    'bodymass': 'mean', 
    'kcal_hr': 'mean', # maybe will be intersting to see the sum also
  }
).dropna() 

# two mice has escaped from the cage so it need to be counted 

In [8]:
def create_bar(averages_df, colors, feature_name, index):
    group_color_and_x = averages_df.index.get_level_values(index)
    y_axis = averages_df[feature_name]
    labels = {'x': group_color_and_x.name, 'y': y_axis.name, 'color': group_color_and_x.name}
    return px.bar(x=group_color_and_x,
                    y=y_axis.values,
                    color=group_color_and_x,
                    color_discrete_sequence=colors,
                    template='simple_white',
                    labels=labels)
for feature in bmass_loco_kcal:
    create_bar(df_ace_overall_grouped,colors,feature,0).show()

In [21]:
mask = df_cleaned['actual_foodupa'] > 0.0001
filtered_non_eating_times_df = df_cleaned['actual_foodupa'][mask]
subjectIDs = filtered_non_eating_times_df.index.get_level_values(1)
group = filtered_non_eating_times_df.index.get_level_values(2)
grouped = filtered_non_eating_times_df.groupby([group])
grouped.agg(['mean', 'sum'])

Unnamed: 0_level_0,mean,sum
Group,Unnamed: 1_level_1,Unnamed: 2_level_1
Control,0.290536,2613.952004
Group_2,0.274303,594.413792
Group_3,0.403859,1200.267478


In [35]:
subjectIDs = df_cleaned.index.get_level_values(1)
group = df_cleaned.index.get_level_values(2)

each_week_grouped = df_cleaned.groupby([pd.Grouper(level=0, freq='W'), subjectIDs, group])
each_week_df = each_week_grouped.agg(
  {
    'actual_allmeters': 'sum', 
    'bodymass': 'mean',
    'actual_foodupa': 'sum', 
    'kcal_hr': 'sum',
    'locomotor_activity': 'sum'
  }
).dropna()

In [50]:
def get_index_values(df, n):
  return df.index.get_level_values(n)

Intake = each_week_df['actual_foodupa']
locomotor_activity = each_week_df['locomotor_activity']
subjectIDs = get_index_values(each_week_df, 1)
each_week = get_index_values(each_week_df, 0)
px.bar(
  x=subjectIDs.values, 
  y=Intake.values, 
  color=subjectIDs, 
  facet_col=each_week.astype('string'), 
  labels={
    'x': Intake.name, 
    'y': subjectIDs.name, 
    'color': subjectIDs.name},
  template='simple_white', 
  color_discrete_sequence=colors)

In [51]:
px.bar(
  x=subjectIDs.values, 
  y=locomotor_activity.values, 
  color=subjectIDs, 
  facet_col=each_week.astype('string'), 
  labels={
    'x': locomotor_activity.name, 
    'y': subjectIDs.name, 
    'color': subjectIDs.name},
  template='simple_white', 
  color_discrete_sequence=colors)

In [55]:
eb = each_week_df['actual_foodupa'] - each_week_df['kcal_hr']
px.bar(
  x=subjectIDs.values, 
  y=eb.values, 
  color=subjectIDs, 
  facet_col=each_week.astype('string'), 
  labels={
    'x': locomotor_activity.name, 
    'y': subjectIDs.name, 
    'color': subjectIDs.name},
  template='simple_white', 
  color_discrete_sequence=colors)