# Data from 1.11.2021 to the end of the app

In [89]:
import pandas as pd
import numpy as np
import bz2
pd.set_option('display.max_columns', None)
import statistics
import matplotlib.pyplot as plt
import seaborn as sns
import math
import statsmodels.api as sm
import matplotlib as mpl
from scipy import stats
from distfit import distfit
import scikit_posthocs as sp
import datetime as dt

pd.options.display.float_format = '{:.2f}'.format

import warnings
warnings.filterwarnings('ignore')

In [90]:
df = pd.read_csv("/home/evida-monika/mhunters/final_data_3.csv.bz2", compression="bz2", sep=",")

# this is cleaned dataset
# we need to delete another user - ID = 235, because it has 274 sessions in 4 weeks - impossible.

df = df[df['id_users'] != 235]

In [91]:
# changing types of data

cols = ['name_en_exercises','discarded_session_execution', 'code_name_sessions', 
        'name_en_sessions', 'active_user_programs', 'completed_user_programs',
        'pro_programs', 'available_programs', 'name_en_programs', 
        'gender', 
        'activity_level', 'goal', 'body_type', 'newsletter_subscription', 'notifications_setting',
        'language', 'scientific_data_usage', 'BMI_category', 'name_en_implements']

for col in cols:
    df[col] = df[col].astype('category')
    
col_date = ['updated_at_ex_ex', 'created_at_exercises', 'updated_at_exercises',
            'updated_at_session_execution', 'created_at_user_programs', 'updated_at_user_programs',
            'created_at_programs', 'updated_at_programs', 'created_at_users', 'updated_at_users',
            'date_of_birth']

for col in col_date:
    df[col] = pd.to_datetime(df[col])#.dt.strftime("%Y-%m-%d %H:%M:%S")

cat_names = {
    'gender': {1: 'male', 0: 'female'},
    'activity_level': {0: 'very active', 1: 'active', 2: 'sedentary'},
    'goal': {0: 'lose', 1: 'gain', 2: 'antiaging'},
    'body_type': {0: 'thin', 1: 'mid', 2: 'strong'}
}

df = df.replace(cat_names)


In [92]:
# dataset only for users

df_users_only_once = df.drop_duplicates(subset=['id_users'], keep='first')

In [93]:
# here dropping all of the columns that are not relevant for users

df_users_only = df_users_only_once.loc[:, ['id_users', 'created_at_users', 'updated_at_users', 'gender',
       'date_of_birth', 'height', 'weight', 'activity_level', 'goal',
       'body_type', 'body_fat', 'newsletter_subscription',
       'notifications_setting', 'training_days_setting', 'language', 'points',
       'scientific_data_usage', 'best_weekly_streak_users', 'BMI', 'BMI_category', 'total_sessions_users2',
                                          'total_time_users2', 'total_reps_users2', 'reps_per_session_users2',
                                          'total_calories_users2', 'kcal_per_session_users2']]

In [94]:
# dataset for only one of id_session_execution

df_session_exe_only_once = df.drop_duplicates(subset=['id_session_execution'], keep='first')

In [95]:
# here dropping all of the columns that are not relevant for session_execution

df_session_exe = df_session_exe_only_once.loc[:, ['id_session_execution', 
                                                  'difficulty_feedback_session_execution',
       'enjoyment_feedback_session_execution',
       'reps_executed_session_execution', 'updated_at_session_execution',
       'discarded_session_execution', 'id_sessions', 'order_sessions',
       'time_duration_sessions', 'code_name_sessions', 'name_en_sessions',
       'id_user_programs', 'created_at_user_programs',
       'updated_at_user_programs', 'active_user_programs',
       'current_session_id_user_programs', 'completed_user_programs',
       'id_programs', 'created_at_programs', 'updated_at_programs',
       'pro_programs', 'available_programs', 'strength_programs',
       'endurance_programs', 'technique_programs', 'flexibility_programs',
       'intensity_programs', 'name_en_programs', 'description_en_programs',
       'id_users', 'created_at_users', 'updated_at_users', 'gender',
       'date_of_birth', 'height', 'weight', 'activity_level', 'goal',
       'body_type', 'body_fat', 'newsletter_subscription',
       'notifications_setting', 'training_days_setting', 'language', 'points',
       'scientific_data_usage', 'best_weekly_streak_users', 'BMI', 'BMI_category', 'total_sessions_users2',
       'total_time_session_execution', 'total_reps_session_execution',
       'total_time_users2', 'total_reps_users2', 'reps_per_session_users2',
       'total_time_session_execution_min', 'reps_per_min_session_execution',
       'exercise_execution_time_min', 'calories_session_execution',
       'total_calories_users2', 'kcal_per_session_users2',
                                          'YYYY/WW']]

In [96]:
# manipulations

users_import = pd.read_csv("/home/evida-monika/mhunters/users.csv", sep=",")

users_import = users_import.loc[:, ['id', 'imported']]

users_import.replace('t', True, inplace = True)
users_import.replace('f', False, inplace = True)

# users_import_true = users_import[users_import['imported'] == True]

# imported_true = df_users_only_once.merge(users_import_true, how = 'inner', left_on = 'id_users', right_on = 'id')

users_import2 = users_import[users_import['imported'] == False]

# users_import.shape

imported = df_users_only_once.merge(users_import, how = 'inner', left_on = 'id_users', right_on = 'id')

# df_users_only.shape

imported.shape

(3171, 84)

In [97]:
imported_all = df.merge(users_import, how = 'inner', left_on = 'id_users', right_on = 'id')

df.shape

(1860393, 82)

In [98]:
imported_all

Unnamed: 0,id_ex_ex,reps_executed_ex_ex,execution_time_ex_ex,order_ex_ex,updated_at_ex_ex,id_session_set_ex,order_session_set_ex,id_exercises,created_at_exercises,updated_at_exercises,body_parts_focused_exercises,muscles_exercises,joints_exercises,met_multiplier_exercises,name_en_exercises,id_session_block_ex,order_session_block_ex,id_session_execution,difficulty_feedback_session_execution,enjoyment_feedback_session_execution,reps_executed_session_execution,updated_at_session_execution,discarded_session_execution,id_sessions,order_sessions,time_duration_sessions,code_name_sessions,name_en_sessions,id_user_programs,created_at_user_programs,updated_at_user_programs,active_user_programs,current_session_id_user_programs,completed_user_programs,id_programs,created_at_programs,updated_at_programs,pro_programs,available_programs,strength_programs,endurance_programs,technique_programs,flexibility_programs,intensity_programs,name_en_programs,description_en_programs,id_users,created_at_users,updated_at_users,gender,date_of_birth,height,weight,activity_level,goal,body_type,body_fat,newsletter_subscription,notifications_setting,training_days_setting,language,points,scientific_data_usage,best_weekly_streak_users,id_implements,name_en_implements,BMI,BMI_category,total_sessions_users2,total_time_session_execution,total_reps_session_execution,total_time_users2,total_reps_users2,reps_per_session_users2,total_time_session_execution_min,reps_per_min_session_execution,exercise_execution_time_min,calories,calories_session_execution,total_calories_users2,kcal_per_session_users2,YYYY/WW,id,imported
0,342539,0.00,0.00,14,2020-12-11 14:17:16.127382,11758.00,2.00,5968,2020-10-16 09:33:15.172657,2022-01-21 19:15:56.401181,{},{0},{0},1.00,Rest,1291.00,1.00,1291.00,5.00,3.00,75.00,2020-12-11 14:17:15.877294,False,88.00,1.00,996.00,PH2-2-1,IRP & IR,659.00,2020-12-11 12:57:19.338749,2020-12-11 12:57:19.338749,True,88.00,False,17.00,2020-11-23 13:57:13.225121,2021-09-29 14:53:44.706540,False,True,2.00,2.00,1.00,2.00,2.00,Smash your goals,"Get into the habit of working out, with easy s...",182.00,2020-12-11 11:34:00.850343,2021-12-22 06:39:39.280245,male,2000-01-01,160.00,60.00,active,lose,thin,15.00,False,True,1.00,es,220.00,False,1.00,0.00,No implement,23.44,Normal,2.00,338.00,81.00,583.00,131.00,65.50,5.63,14.38,0.00,0.00,16.02,26.30,13.15,2020/50,182,False
1,342538,0.00,0.00,14,2020-12-11 14:17:16.122598,11758.00,2.00,5968,2020-10-16 09:33:15.172657,2022-01-21 19:15:56.401181,{},{0},{0},1.00,Rest,1291.00,1.00,1291.00,5.00,3.00,75.00,2020-12-11 14:17:15.877294,False,88.00,1.00,996.00,PH2-2-1,IRP & IR,659.00,2020-12-11 12:57:19.338749,2020-12-11 12:57:19.338749,True,88.00,False,17.00,2020-11-23 13:57:13.225121,2021-09-29 14:53:44.706540,False,True,2.00,2.00,1.00,2.00,2.00,Smash your goals,"Get into the habit of working out, with easy s...",182.00,2020-12-11 11:34:00.850343,2021-12-22 06:39:39.280245,male,2000-01-01,160.00,60.00,active,lose,thin,15.00,False,True,1.00,es,220.00,False,1.00,0.00,No implement,23.44,Normal,2.00,338.00,81.00,583.00,131.00,65.50,5.63,14.38,0.00,0.00,16.02,26.30,13.15,2020/50,182,False
2,342537,0.00,0.00,14,2020-12-11 14:17:16.118512,11758.00,2.00,5968,2020-10-16 09:33:15.172657,2022-01-21 19:15:56.401181,{},{0},{0},1.00,Rest,1291.00,1.00,1291.00,5.00,3.00,75.00,2020-12-11 14:17:15.877294,False,88.00,1.00,996.00,PH2-2-1,IRP & IR,659.00,2020-12-11 12:57:19.338749,2020-12-11 12:57:19.338749,True,88.00,False,17.00,2020-11-23 13:57:13.225121,2021-09-29 14:53:44.706540,False,True,2.00,2.00,1.00,2.00,2.00,Smash your goals,"Get into the habit of working out, with easy s...",182.00,2020-12-11 11:34:00.850343,2021-12-22 06:39:39.280245,male,2000-01-01,160.00,60.00,active,lose,thin,15.00,False,True,1.00,es,220.00,False,1.00,0.00,No implement,23.44,Normal,2.00,338.00,81.00,583.00,131.00,65.50,5.63,14.38,0.00,0.00,16.02,26.30,13.15,2020/50,182,False
3,342536,0.00,0.00,14,2020-12-11 14:17:16.114356,11758.00,2.00,5968,2020-10-16 09:33:15.172657,2022-01-21 19:15:56.401181,{},{0},{0},1.00,Rest,1291.00,1.00,1291.00,5.00,3.00,75.00,2020-12-11 14:17:15.877294,False,88.00,1.00,996.00,PH2-2-1,IRP & IR,659.00,2020-12-11 12:57:19.338749,2020-12-11 12:57:19.338749,True,88.00,False,17.00,2020-11-23 13:57:13.225121,2021-09-29 14:53:44.706540,False,True,2.00,2.00,1.00,2.00,2.00,Smash your goals,"Get into the habit of working out, with easy s...",182.00,2020-12-11 11:34:00.850343,2021-12-22 06:39:39.280245,male,2000-01-01,160.00,60.00,active,lose,thin,15.00,False,True,1.00,es,220.00,False,1.00,0.00,No implement,23.44,Normal,2.00,338.00,81.00,583.00,131.00,65.50,5.63,14.38,0.00,0.00,16.02,26.30,13.15,2020/50,182,False
4,342535,0.00,0.00,14,2020-12-11 14:17:16.110165,11758.00,2.00,5968,2020-10-16 09:33:15.172657,2022-01-21 19:15:56.401181,{},{0},{0},1.00,Rest,1291.00,1.00,1291.00,5.00,3.00,75.00,2020-12-11 14:17:15.877294,False,88.00,1.00,996.00,PH2-2-1,IRP & IR,659.00,2020-12-11 12:57:19.338749,2020-12-11 12:57:19.338749,True,88.00,False,17.00,2020-11-23 13:57:13.225121,2021-09-29 14:53:44.706540,False,True,2.00,2.00,1.00,2.00,2.00,Smash your goals,"Get into the habit of working out, with easy s...",182.00,2020-12-11 11:34:00.850343,2021-12-22 06:39:39.280245,male,2000-01-01,160.00,60.00,active,lose,thin,15.00,False,True,1.00,es,220.00,False,1.00,0.00,No implement,23.44,Normal,2.00,338.00,81.00,583.00,131.00,65.50,5.63,14.38,0.00,0.00,16.02,26.30,13.15,2020/50,182,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1860388,1837552,10.00,40.00,1,2022-01-01 19:28:46.747983,233854.00,4.00,5236,2020-10-15 12:37:15.906971,2022-01-11 08:02:28.410085,"{""Todo el cuerpo""}","{pectorales,"" tríceps"","" cuádriceps""}","{hombros,"" cadera""}",5.00,Burpee,52445.00,3.00,22939.00,5.00,3.00,229.00,2022-01-01 19:28:46.794546,False,536.00,1.00,1241.00,Sesion 1,Sesion 1,10370.00,2021-10-28 06:00:46.975459,2022-01-01 19:29:11.595926,True,537.00,False,29.00,2020-11-23 14:12:13.700871,2021-09-29 14:54:02.270887,True,True,3.00,4.00,3.00,4.00,4.00,Unbreakable,Join Unbreakable to optimize your body from th...,1445.00,2021-10-25 11:00:57.621767,2022-01-01 19:29:11.563186,male,1973-12-31,165.00,62.00,active,lose,thin,24.00,True,True,3.00,es,21536.00,False,1.00,0.00,No implement,22.77,Normal,1.00,1369.00,254.00,1369.00,254.00,254.00,22.82,11.13,0.67,3.62,75.32,75.32,75.32,2021/52,1445,True
1860389,1837553,20.00,80.00,2,2022-01-01 19:28:46.751620,233854.00,4.00,5606,2020-10-15 12:37:20.301418,2021-08-24 13:43:17.042346,{Core},"{""recto mayor del abdomen"","" abdominales oblic...","{cadera,"" rodillas""}",2.50,Sit-up,52445.00,3.00,22939.00,5.00,3.00,229.00,2022-01-01 19:28:46.794546,False,536.00,1.00,1241.00,Sesion 1,Sesion 1,10370.00,2021-10-28 06:00:46.975459,2022-01-01 19:29:11.595926,True,537.00,False,29.00,2020-11-23 14:12:13.700871,2021-09-29 14:54:02.270887,True,True,3.00,4.00,3.00,4.00,4.00,Unbreakable,Join Unbreakable to optimize your body from th...,1445.00,2021-10-25 11:00:57.621767,2022-01-01 19:29:11.563186,male,1973-12-31,165.00,62.00,active,lose,thin,24.00,True,True,3.00,es,21536.00,False,1.00,0.00,No implement,22.77,Normal,1.00,1369.00,254.00,1369.00,254.00,254.00,22.82,11.13,1.33,3.62,75.32,75.32,75.32,2021/52,1445,True
1860390,1837554,1.00,3.00,1,2022-01-01 19:28:46.764120,233855.00,1.00,5662,2020-10-15 12:37:20.908696,2021-08-24 13:43:18.370778,{Core},"{""recto mayor del abdomen"","" abdominales oblic...","{codos,"" hombros""}",2.50,Side plank (right),52446.00,4.00,22939.00,5.00,3.00,229.00,2022-01-01 19:28:46.794546,False,536.00,1.00,1241.00,Sesion 1,Sesion 1,10370.00,2021-10-28 06:00:46.975459,2022-01-01 19:29:11.595926,True,537.00,False,29.00,2020-11-23 14:12:13.700871,2021-09-29 14:54:02.270887,True,True,3.00,4.00,3.00,4.00,4.00,Unbreakable,Join Unbreakable to optimize your body from th...,1445.00,2021-10-25 11:00:57.621767,2022-01-01 19:29:11.563186,male,1973-12-31,165.00,62.00,active,lose,thin,24.00,True,True,3.00,es,21536.00,False,1.00,0.00,No implement,22.77,Normal,1.00,1369.00,254.00,1369.00,254.00,254.00,22.82,11.13,0.05,0.14,75.32,75.32,75.32,2021/52,1445,True
1860391,1837545,10.00,40.00,1,2022-01-01 19:28:46.713013,233852.00,2.00,5236,2020-10-15 12:37:15.906971,2022-01-11 08:02:28.410085,"{""Todo el cuerpo""}","{pectorales,"" tríceps"","" cuádriceps""}","{hombros,"" cadera""}",5.00,Burpee,52445.00,3.00,22939.00,5.00,3.00,229.00,2022-01-01 19:28:46.794546,False,536.00,1.00,1241.00,Sesion 1,Sesion 1,10370.00,2021-10-28 06:00:46.975459,2022-01-01 19:29:11.595926,True,537.00,False,29.00,2020-11-23 14:12:13.700871,2021-09-29 14:54:02.270887,True,True,3.00,4.00,3.00,4.00,4.00,Unbreakable,Join Unbreakable to optimize your body from th...,1445.00,2021-10-25 11:00:57.621767,2022-01-01 19:29:11.563186,male,1973-12-31,165.00,62.00,active,lose,thin,24.00,True,True,3.00,es,21536.00,False,1.00,0.00,No implement,22.77,Normal,1.00,1369.00,254.00,1369.00,254.00,254.00,22.82,11.13,0.67,3.62,75.32,75.32,75.32,2021/52,1445,True


In [99]:
imported_all.shape

imported_sessions = df_session_exe.merge(users_import, how = 'inner', left_on = 'id_users', right_on = 'id')

In [100]:
imported_sessions.head()

Unnamed: 0,id_session_execution,difficulty_feedback_session_execution,enjoyment_feedback_session_execution,reps_executed_session_execution,updated_at_session_execution,discarded_session_execution,id_sessions,order_sessions,time_duration_sessions,code_name_sessions,name_en_sessions,id_user_programs,created_at_user_programs,updated_at_user_programs,active_user_programs,current_session_id_user_programs,completed_user_programs,id_programs,created_at_programs,updated_at_programs,pro_programs,available_programs,strength_programs,endurance_programs,technique_programs,flexibility_programs,intensity_programs,name_en_programs,description_en_programs,id_users,created_at_users,updated_at_users,gender,date_of_birth,height,weight,activity_level,goal,body_type,body_fat,newsletter_subscription,notifications_setting,training_days_setting,language,points,scientific_data_usage,best_weekly_streak_users,BMI,BMI_category,total_sessions_users2,total_time_session_execution,total_reps_session_execution,total_time_users2,total_reps_users2,reps_per_session_users2,total_time_session_execution_min,reps_per_min_session_execution,exercise_execution_time_min,calories_session_execution,total_calories_users2,kcal_per_session_users2,YYYY/WW,id,imported
0,1291.0,5.0,3.0,75.0,2020-12-11 14:17:15.877294,False,88.0,1.0,996.0,PH2-2-1,IRP & IR,659.0,2020-12-11 12:57:19.338749,2020-12-11 12:57:19.338749,True,88.0,False,17.0,2020-11-23 13:57:13.225121,2021-09-29 14:53:44.706540,False,True,2.0,2.0,1.0,2.0,2.0,Smash your goals,"Get into the habit of working out, with easy s...",182.0,2020-12-11 11:34:00.850343,2021-12-22 06:39:39.280245,male,2000-01-01,160.0,60.0,active,lose,thin,15.0,False,True,1.0,es,220.0,False,1.0,23.44,Normal,2.0,338.0,81.0,583.0,131.0,65.5,5.63,14.38,0.0,16.02,26.3,13.15,2020/50,182,False
1,1292.0,5.0,3.0,45.0,2020-12-11 14:22:50.864652,False,88.0,1.0,996.0,PH2-2-1,IRP & IR,659.0,2020-12-11 12:57:19.338749,2020-12-11 12:57:19.338749,True,88.0,False,17.0,2020-11-23 13:57:13.225121,2021-09-29 14:53:44.706540,False,True,2.0,2.0,1.0,2.0,2.0,Smash your goals,"Get into the habit of working out, with easy s...",182.0,2020-12-11 11:34:00.850343,2021-12-22 06:39:39.280245,male,2000-01-01,160.0,60.0,active,lose,thin,15.0,False,True,1.0,es,220.0,False,1.0,23.44,Normal,2.0,245.0,50.0,583.0,131.0,65.5,4.08,12.24,0.02,10.28,26.3,13.15,2020/50,182,False
2,3253.0,5.0,3.0,0.0,2021-02-09 12:53:19.873117,False,223.0,1.0,1405.0,PM5-2-1,IRP & TB,704.0,2021-02-02 11:11:57.575351,2021-03-17 12:35:12.690296,False,226.0,False,27.0,2020-11-23 14:05:30.101469,2021-09-29 14:53:35.625344,False,True,3.0,4.0,2.0,3.0,4.0,Smash your goals,The 10-session program created to help you bui...,360.0,2021-02-02 11:10:55.159616,2021-12-22 06:39:41.148120,female,2000-01-01,160.0,60.0,sedentary,lose,thin,25.0,False,True,5.0,en,1300.0,False,10.0,23.44,Normal,10.0,97.0,30.0,7697.0,2106.0,210.6,1.62,18.56,0.02,3.64,439.54,43.95,2021/06,360,False
3,3279.0,5.0,3.0,282.0,2021-03-14 11:44:54.066742,False,224.0,2.0,988.0,PM5-2-2,SS & IT,704.0,2021-02-02 11:11:57.575351,2021-03-17 12:35:12.690296,False,226.0,False,27.0,2020-11-23 14:05:30.101469,2021-09-29 14:53:35.625344,False,True,3.0,4.0,2.0,3.0,4.0,Smash your goals,The 10-session program created to help you bui...,360.0,2021-02-02 11:10:55.159616,2021-12-22 06:39:41.148120,female,2000-01-01,160.0,60.0,sedentary,lose,thin,25.0,False,True,5.0,en,1300.0,False,10.0,23.44,Normal,10.0,1333.0,282.0,7697.0,2106.0,210.6,22.22,12.69,0.0,81.42,439.54,43.95,2021/10,360,False
4,3281.0,5.0,3.0,276.0,2021-03-16 18:05:25.233229,False,226.0,4.0,1690.0,PM5-2-4,SS & IRP,704.0,2021-02-02 11:11:57.575351,2021-03-17 12:35:12.690296,False,226.0,False,27.0,2020-11-23 14:05:30.101469,2021-09-29 14:53:35.625344,False,True,3.0,4.0,2.0,3.0,4.0,Smash your goals,The 10-session program created to help you bui...,360.0,2021-02-02 11:10:55.159616,2021-12-22 06:39:41.148120,female,2000-01-01,160.0,60.0,sedentary,lose,thin,25.0,False,True,5.0,en,1300.0,False,10.0,23.44,Normal,10.0,775.0,284.0,7697.0,2106.0,210.6,12.92,21.99,0.02,39.81,439.54,43.95,2021/11,360,False


In [101]:
df_session_exe.shape

imported_sessions.shape

imported_sessions = imported_sessions.sort_values(['id_users','updated_at_session_execution'])

imported_sessions_2 = imported_sessions[(imported_sessions['updated_at_session_execution'] > '2021-11-01')]


In [83]:
imported_sessions_lat_sess = imported_sessions.drop_duplicates(subset=['id_users'], keep = 'last')

# imported_sessions_lat_sess[(imported_sessions_lat_sess['imported'] == True) & (imported_sessions_lat_sess['updated_at_session_execution'].dt.year == 2022)]

# imported_sessions_lat_sess[(imported_sessions_lat_sess['imported'] == True) & (imported_sessions_lat_sess['updated_at_session_execution'] > '2021-11-01')]

# imported_sessions_lat_sess[(imported_sessions_lat_sess['imported'] == True) & (imported_sessions_lat_sess['updated_at_session_execution'] > '2021-11-01') & (imported_sessions_lat_sess['updated_at_session_execution'].dt.year == 2021)]

imported_sessions_lat_sess_users = imported_sessions_lat_sess[(imported_sessions_lat_sess['updated_at_session_execution'] > '2021-11-01')]

In [102]:
imported_sessions_2.sort_values('updated_at_session_execution')

Unnamed: 0,id_session_execution,difficulty_feedback_session_execution,enjoyment_feedback_session_execution,reps_executed_session_execution,updated_at_session_execution,discarded_session_execution,id_sessions,order_sessions,time_duration_sessions,code_name_sessions,name_en_sessions,id_user_programs,created_at_user_programs,updated_at_user_programs,active_user_programs,current_session_id_user_programs,completed_user_programs,id_programs,created_at_programs,updated_at_programs,pro_programs,available_programs,strength_programs,endurance_programs,technique_programs,flexibility_programs,intensity_programs,name_en_programs,description_en_programs,id_users,created_at_users,updated_at_users,gender,date_of_birth,height,weight,activity_level,goal,body_type,body_fat,newsletter_subscription,notifications_setting,training_days_setting,language,points,scientific_data_usage,best_weekly_streak_users,BMI,BMI_category,total_sessions_users2,total_time_session_execution,total_reps_session_execution,total_time_users2,total_reps_users2,reps_per_session_users2,total_time_session_execution_min,reps_per_min_session_execution,exercise_execution_time_min,calories_session_execution,total_calories_users2,kcal_per_session_users2,YYYY/WW,id,imported
8389,6017.00,5.00,3.00,222.00,2021-11-01 00:06:11.953512,False,595.00,2.00,1238.00,W01_D02,Fundamentals - Session 2,21743.00,2021-10-28 06:17:47.167701,2021-11-19 22:35:44.327916,False,602.00,False,30.00,2020-11-23 14:12:50.072945,2021-09-29 14:53:59.484766,True,True,4.00,4.00,5.00,2.00,4.00,Ring Master,The simplest equipment is often the most effec...,6557.00,2021-10-28 06:17:47.108449,2022-07-20 06:23:42.531597,male,1990-01-01,180.00,70.00,active,gain,thin,15.00,True,True,4.00,es,26447.00,True,5.00,21.60,Normal,40.00,1721.00,224.00,74140.00,7894.00,197.35,28.68,7.81,1.52,81.70,3878.08,96.95,2021/44,6557,True
8177,6018.00,4.00,4.00,120.00,2021-11-01 00:06:14.236997,False,865.00,3.00,630.00,Descanso_activo_3,Rewarding,26176.00,2021-10-28 15:35:59.277604,2021-10-28 15:35:59.277604,False,594.00,False,30.00,2020-11-23 14:12:50.072945,2021-09-29 14:53:59.484766,True,True,4.00,4.00,5.00,2.00,4.00,Ring Master,The simplest equipment is often the most effec...,7761.00,2021-10-28 06:35:56.542124,2022-09-07 18:28:49.283551,male,1974-08-29,186.00,79.00,active,gain,mid,10.00,True,True,5.00,es,212090.00,False,37.00,22.84,Normal,135.00,1647.00,121.00,244626.00,36903.00,273.36,27.45,4.41,0.50,73.80,16250.29,120.37,2021/44,7761,True
8875,6019.00,5.00,3.00,302.00,2021-11-01 00:14:33.713258,False,537.00,2.00,1223.00,Sesion 2,Session 2,19842.00,2021-10-28 06:15:19.240653,2022-01-25 00:40:01.359275,True,565.00,False,29.00,2020-11-23 14:12:13.700871,2021-09-29 14:54:02.270887,True,True,3.00,4.00,3.00,4.00,4.00,Unbreakable,Join Unbreakable to optimize your body from th...,6000.00,2021-10-28 06:15:19.208652,2022-05-08 20:29:51.356913,male,1985-01-01,186.00,86.00,active,gain,mid,30.00,False,False,3.00,es,100309.00,False,8.00,24.86,Normal,28.00,1424.00,304.00,41367.00,6535.00,233.39,23.73,12.81,0.00,89.66,2900.87,103.60,2021/44,6000,True
6233,6020.00,5.00,3.00,313.00,2021-11-01 00:37:06.171780,False,322.00,9.00,1480.00,MuHu10.1,Session 8,22865.00,2021-10-28 06:19:14.772421,2021-11-19 02:34:49.321532,False,324.00,False,34.00,2020-11-23 14:15:43.775009,2021-09-29 14:54:09.125824,True,True,5.00,4.00,3.00,3.00,5.00,Muscle Hunters Initiation 1,Muscle Hunters is a program created for you to...,6899.00,2021-10-28 06:19:14.747586,2022-06-12 02:04:34.935740,male,1993-05-23,178.00,70.00,active,lose,mid,30.00,False,True,5.00,es,71762.00,False,5.00,22.09,Normal,27.00,1286.00,336.00,47207.00,5413.00,200.48,21.43,15.68,0.15,54.77,2659.88,98.51,2021/44,6899,True
18495,6021.00,6.00,4.00,140.00,2021-11-01 03:08:56.777633,False,764.00,1.00,494.00,PM1.P2.1_V2,Session 1,5816.00,2021-10-25 11:02:42.363208,2021-11-21 08:17:14.151481,False,773.00,True,23.00,2020-11-23 14:03:38.477856,2021-09-29 14:53:34.661221,False,True,1.00,3.00,1.00,2.00,2.00,Smash your goals,Choose the program created to help you get int...,1829.00,2021-10-25 11:02:42.268420,2022-05-25 10:35:23.257373,female,1978-12-27,164.00,62.00,active,lose,thin,30.00,True,True,3.00,en,65363.00,True,32.00,23.05,Normal,73.00,717.00,140.00,100432.00,15555.00,213.08,11.95,11.72,0.42,33.12,5198.24,71.21,2021/44,1829,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27277,746511.00,7.00,5.00,228.00,2022-05-27 07:24:41.594127,False,552.00,17.00,1260.00,Sesion 17,Session 17,75140.00,2022-05-04 06:40:45.248216,2022-09-08 17:55:41.940099,False,555.00,False,29.00,2020-11-23 14:12:13.700871,2021-09-29 14:54:02.270887,True,True,3.00,4.00,3.00,4.00,4.00,Unbreakable,Join Unbreakable to optimize your body from th...,7631.00,2021-10-28 06:35:23.279492,2022-09-08 17:55:41.904600,male,1991-12-31,173.00,64.00,very active,gain,mid,15.00,False,True,3.00,es,81330.00,False,18.00,21.38,Normal,50.00,2484.00,228.00,82381.00,13276.00,265.52,41.40,5.51,1.48,138.09,4727.83,94.56,2022/21,7631,True
38034,746512.00,5.00,3.00,200.00,2022-05-27 07:25:29.252130,False,543.00,8.00,80.00,Sesion 8,Session 8,77311.00,2022-05-16 05:45:14.460699,2022-06-21 07:45:28.442772,True,543.00,False,29.00,2020-11-23 14:12:13.700871,2021-09-29 14:54:02.270887,True,True,3.00,4.00,3.00,4.00,4.00,Unbreakable,Join Unbreakable to optimize your body from th...,3547.00,2021-10-28 06:01:59.353948,2022-06-21 07:45:28.405112,male,1981-05-31,170.00,74.00,active,antiaging,mid,22.10,True,True,5.00,es,87537.00,False,6.00,25.61,Overweight,11.00,941.00,200.00,22012.00,3474.00,315.82,15.68,12.75,0.65,75.99,1521.20,138.29,2022/21,3547,True
48724,746513.00,7.00,4.00,358.00,2022-05-27 07:26:34.460050,False,601.00,8.00,1583.00,W02_D04,Fundamentals - Session 8,27704.00,2021-11-03 10:06:42.013672,2021-11-03 10:06:42.013672,False,594.00,False,30.00,2020-11-23 14:12:50.072945,2021-09-29 14:53:59.484766,True,True,4.00,4.00,5.00,2.00,4.00,Ring Master,The simplest equipment is often the most effec...,7114.00,2021-10-28 06:33:09.879166,2022-09-09 08:58:58.404435,male,1968-05-07,170.00,70.00,very active,gain,strong,20.00,True,True,3.00,es,28504.00,False,8.00,24.22,Normal,9.00,1015.00,360.00,13507.00,1818.00,202.00,16.92,21.28,0.67,67.06,652.57,72.51,2022/21,7114,True
49016,746514.00,5.00,3.00,291.00,2022-05-27 07:38:41.315872,False,544.00,9.00,65.00,Sesion 9,Session 9,77078.00,2022-05-13 16:42:22.760337,2022-09-03 06:17:19.199271,False,585.00,True,29.00,2020-11-23 14:12:13.700871,2021-09-29 14:54:02.270887,True,True,3.00,4.00,3.00,4.00,4.00,Unbreakable,Join Unbreakable to optimize your body from th...,17803.00,2022-05-13 16:39:12.843065,2022-09-10 07:01:07.198345,male,1966-12-22,178.00,65.00,active,gain,thin,20.00,False,True,5.00,es,6000.00,True,11.00,20.52,Normal,9.00,1509.00,329.00,13032.00,2451.00,272.33,25.15,13.08,0.55,105.30,806.72,89.64,2022/21,17803,False


In [103]:
imported_sessions_2.drop(['id', 'imported'], axis = 1, inplace = True)

In [104]:
imported_sessions_2 = imported_sessions_2.loc[:, 'id_session_execution']

In [105]:
df_after_upgrade = df.merge(imported_sessions_2, how = 'inner', left_on = 'id_session_execution', right_on = 'id_session_execution')


In [106]:
df_after_upgrade.shape

(1798735, 82)

In [66]:
'''

imported_sessions_lat_sess_users = imported_sessions_lat_sess_users.loc[:, ['id_session_execution']]

imported_sessions_lat_sess[(imported_sessions_lat_sess['updated_at_session_execution'] >= '2021-11-01')]['id_users']

df_after_upgrade = df.merge(imported_sessions_lat_sess_users, how = 'inner', left_on = 'id_session_execution', right_on = 'id_session_execution')

df_after_upgrade.shape # number of exercise executions after 1st deletion (1.11.2021)

df_session_after_upgrade = df_session_exe.merge(imported_sessions_lat_sess_users, how = 'inner', left_on = 'id_session_execution', right_on = 'id_session_execution')

df_session_after_upgrade.shape # number of sessions after 1st deletion (1.11.2021)

df_session_exe.shape

imported_sessions_lat_sess_users_other_date = imported_sessions_lat_sess[(imported_sessions_lat_sess['updated_at_session_execution'] >= '2021-12-01')]

df_after_upgrade_other_date = df.merge(imported_sessions_lat_sess_users_other_date, how = 'inner', left_on = 'id_users', right_on = 'id_users')

df_after_upgrade_other_date.shape # number of exercise executions after 2nd deletion (1.12.2021)

df_session_after_upgrade_other_date = df_session_exe.merge(imported_sessions_lat_sess_users_other_date, how = 'inner', left_on = 'id_users', right_on = 'id_users')

df_session_after_upgrade_other_date.shape # number of sessions after 2nd deletion (1.12.2021)


df_session_after_upgrade_users = df_session_after_upgrade.drop_duplicates(['id_users'], keep = 'last')

df_session_after_upgrade_users.shape # number of users after 1st deletion of data (1.11.2021)

df_after_upgrade_other_date_users = df_after_upgrade_other_date.drop_duplicates(['id_users'], keep = 'last')

df_after_upgrade_other_date_users.shape # number of users after 2nd deletion (1.12.2021)

# looking only for data without imported users
imported_only_false = imported_all[imported_all['imported'] == False]

imported_only_false = imported_only_false.sort_values(['id_users','updated_at_session_execution'])

imported_only_false_sess_exe = imported_only_false.drop_duplicates(['id_session_execution'], keep = 'last')

imported_only_false_sess_exe.shape # number of sessions after deleting all users that were in a new app

imported_only_false_users = imported_only_false.drop_duplicates(['id_users'], keep = 'last')

#imported_only_false_users.shape # number of users after deleting all users that were in a new app

'''

"\n\nimported_sessions_lat_sess_users = imported_sessions_lat_sess_users.loc[:, ['id_session_execution']]\n\nimported_sessions_lat_sess[(imported_sessions_lat_sess['updated_at_session_execution'] >= '2021-11-01')]['id_users']\n\ndf_after_upgrade = df.merge(imported_sessions_lat_sess_users, how = 'inner', left_on = 'id_session_execution', right_on = 'id_session_execution')\n\ndf_after_upgrade.shape # number of exercise executions after 1st deletion (1.11.2021)\n\ndf_session_after_upgrade = df_session_exe.merge(imported_sessions_lat_sess_users, how = 'inner', left_on = 'id_session_execution', right_on = 'id_session_execution')\n\ndf_session_after_upgrade.shape # number of sessions after 1st deletion (1.11.2021)\n\ndf_session_exe.shape\n\nimported_sessions_lat_sess_users_other_date = imported_sessions_lat_sess[(imported_sessions_lat_sess['updated_at_session_execution'] >= '2021-12-01')]\n\ndf_after_upgrade_other_date = df.merge(imported_sessions_lat_sess_users_other_date, how = 'inner', l

In [108]:
df_after_upgrade['id_session_execution'].nunique()

47617

In [109]:
df26 = df_after_upgrade.copy()

In [110]:
df26.drop(['total_sessions_users2',
       'total_time_session_execution', 'total_reps_session_execution',
       'total_time_users2', 'total_reps_users2', 'reps_per_session_users2',
       'total_time_session_execution_min', 'reps_per_min_session_execution',
       'exercise_execution_time_min', 'calories', 'calories_session_execution',
       'total_calories_users2', 'kcal_per_session_users2'], axis = 1, inplace = True)

In [111]:
cos6 = df26.loc[:, ['id_users', 'id_session_execution']]

In [112]:
total_sessions_user2 = cos6.groupby(['id_users'])['id_session_execution'].nunique()

In [113]:
total_sessions_user2

id_users
108.00       7
112.00       1
172.00       6
219.00       2
514.00      18
            ..
18127.00     1
18147.00     1
18157.00     1
18165.00     3
18174.00     1
Name: id_session_execution, Length: 3034, dtype: int64

In [114]:
df27 = df26.copy()

In [115]:
df27 = df27.merge(total_sessions_user2, how = 'left', left_on = 'id_users', right_on = 'id_users')

In [116]:
df27.rename(columns = {'id_session_execution_x': 'id_session_execution',
                       'id_session_execution_y': 'total_sessions_users2'},
            inplace = True)

In [117]:
df27['total_sessions_users2'] = df27['total_sessions_users2'].astype(float)

In [118]:
cos7 = df27.groupby(['id_session_execution'])['reps_executed_ex_ex'].sum()

cos8 = df27.groupby(['id_session_execution'])['execution_time_ex_ex'].sum()

In [119]:
df28 = df27.merge(cos8, how = 'left', left_on = 'id_session_execution', right_on = 'id_session_execution')

df28.rename(columns = {'execution_time_ex_ex_x': 'execution_time_ex_ex',
                       'execution_time_ex_ex_y': 'total_time_session_execution'},
           inplace = True)

In [120]:
df29 = df28.merge(cos7, how = 'left', left_on = 'id_session_execution', right_on = 'id_session_execution')

df29.rename(columns = {'reps_executed_ex_ex_x': 'reps_executed_ex_ex',
                       'reps_executed_ex_ex_y': 'total_reps_session_execution'},
           inplace = True)

In [121]:
coss1 = df29.drop_duplicates(subset=['id_session_execution'], keep='first')

cos9 = coss1.groupby(['id_users'])['total_time_session_execution'].sum()

In [122]:
df30 = df29.merge(cos9, how = 'left', left_on = 'id_users', right_on = 'id_users')

df30.rename(columns = {'total_time_session_execution_x': 'total_time_session_execution',
                       'total_time_session_execution_y': 'total_time_users2'},
            inplace = True)

In [123]:
cos10 = coss1.groupby(['id_users'])['total_reps_session_execution'].sum()

In [124]:
df31 = df30.merge(cos10, how = 'left', left_on = 'id_users', right_on = 'id_users')

df31.rename(columns = {'total_reps_session_execution_x': 'total_reps_session_execution',
                       'total_reps_session_execution_y': 'total_reps_users2'},
            inplace = True)

In [125]:
df31['reps_per_session_users2'] = df31['total_reps_users2'] / df31['total_sessions_users2']

In [126]:
df31['total_time_session_execution_min'] = df31['total_time_session_execution']/60

df31['reps_per_min_session_execution'] = df31['total_reps_session_execution']/df31['total_time_session_execution_min']


In [127]:
df31['exercise_execution_time_min'] = df31['execution_time_ex_ex']/60

df31['calories'] = df31['exercise_execution_time_min'] * (df31['met_multiplier_exercises'] * 3.5 * df31['weight']) / 200


In [128]:
cos11 = df31.groupby(['id_session_execution'])['calories'].sum()

In [129]:
df32 = df31.merge(cos11, how = 'left', left_on = 'id_session_execution', right_on = 'id_session_execution')

df32.rename(columns = {'calories_x': 'calories',
                       'calories_y': 'calories_session_execution'},
            inplace = True)

In [130]:
coss2 = df32.drop_duplicates(subset=['id_session_execution'], keep='first')

cos12 = coss2.groupby(['id_users'])['calories_session_execution'].sum()

In [131]:
df33 = df32.merge(cos12, how = 'left', left_on = 'id_users', right_on = 'id_users')

df33.rename(columns = {'calories_session_execution_x': 'calories_session_execution',
                       'calories_session_execution_y': 'total_calories_users2'},
            inplace = True)

In [132]:
df33['kcal_per_session_users2'] = df33['total_calories_users2'] / df33['total_sessions_users2']

In [133]:
df33.head()

Unnamed: 0,id_ex_ex,reps_executed_ex_ex,execution_time_ex_ex,order_ex_ex,updated_at_ex_ex,id_session_set_ex,order_session_set_ex,id_exercises,created_at_exercises,updated_at_exercises,body_parts_focused_exercises,muscles_exercises,joints_exercises,met_multiplier_exercises,name_en_exercises,id_session_block_ex,order_session_block_ex,id_session_execution,difficulty_feedback_session_execution,enjoyment_feedback_session_execution,reps_executed_session_execution,updated_at_session_execution,discarded_session_execution,id_sessions,order_sessions,time_duration_sessions,code_name_sessions,name_en_sessions,id_user_programs,created_at_user_programs,updated_at_user_programs,active_user_programs,current_session_id_user_programs,completed_user_programs,id_programs,created_at_programs,updated_at_programs,pro_programs,available_programs,strength_programs,endurance_programs,technique_programs,flexibility_programs,intensity_programs,name_en_programs,description_en_programs,id_users,created_at_users,updated_at_users,gender,date_of_birth,height,weight,activity_level,goal,body_type,body_fat,newsletter_subscription,notifications_setting,training_days_setting,language,points,scientific_data_usage,best_weekly_streak_users,id_implements,name_en_implements,BMI,BMI_category,YYYY/WW,total_sessions_users2,total_time_session_execution,total_reps_session_execution,total_time_users2,total_reps_users2,reps_per_session_users2,total_time_session_execution_min,reps_per_min_session_execution,exercise_execution_time_min,calories,calories_session_execution,total_calories_users2,kcal_per_session_users2
0,1296434,0.0,2.0,3,2021-11-01 12:05:39.918185,53220.0,3.0,5968,2020-10-16 09:33:15.172657,2022-01-21 19:15:56.401181,{},{0},{0},1.0,Rest,8261.0,3.0,6132.0,4.0,3.0,368.0,2021-11-01 12:05:40.078024,False,710.0,2.0,1105.0,PM1.2_V2,Session 2,7181.0,2021-10-25 11:04:40.657070,2021-11-28 11:26:22.131395,False,713.0,True,10.0,2020-11-23 13:41:46.587265,2021-09-29 14:53:34.897432,False,True,1.0,3.0,1.0,2.0,2.0,Get motivated!,The ultimate beginner’s program designed to he...,2273.0,2021-10-25 11:04:40.560589,2022-09-07 12:39:46.135605,female,1981-12-31,163.0,66.0,active,lose,mid,15.0,False,True,3.0,es,439070.0,True,8.0,0.0,No implement,24.84,Normal,2021/44,51.0,988.0,368.0,46208.0,9680.0,189.8,16.47,22.35,0.03,0.04,50.95,2463.91,48.31
1,1296435,3.0,8.0,1,2021-11-01 12:05:39.928907,53221.0,4.0,5398,2020-10-15 12:37:17.966340,2021-10-13 09:46:29.957461,"{Piernas,Glúteos}","{glúteos,"" cuádriceps""}","{cadera,"" rodillas"","" tobillos""}",3.5,Lunges (right),8261.0,3.0,6132.0,4.0,3.0,368.0,2021-11-01 12:05:40.078024,False,710.0,2.0,1105.0,PM1.2_V2,Session 2,7181.0,2021-10-25 11:04:40.657070,2021-11-28 11:26:22.131395,False,713.0,True,10.0,2020-11-23 13:41:46.587265,2021-09-29 14:53:34.897432,False,True,1.0,3.0,1.0,2.0,2.0,Get motivated!,The ultimate beginner’s program designed to he...,2273.0,2021-10-25 11:04:40.560589,2022-09-07 12:39:46.135605,female,1981-12-31,163.0,66.0,active,lose,mid,15.0,False,True,3.0,es,439070.0,True,8.0,0.0,No implement,24.84,Normal,2021/44,51.0,988.0,368.0,46208.0,9680.0,189.8,16.47,22.35,0.13,0.54,50.95,2463.91,48.31
2,1296436,3.0,9.0,2,2021-11-01 12:05:39.933210,53221.0,4.0,5399,2020-10-15 12:37:17.976799,2021-10-13 09:46:29.967689,"{Piernas,Glúteos}","{glúteos,"" cuádriceps""}","{cadera,"" rodillas"","" tobillos""}",3.5,Lunges (left),8261.0,3.0,6132.0,4.0,3.0,368.0,2021-11-01 12:05:40.078024,False,710.0,2.0,1105.0,PM1.2_V2,Session 2,7181.0,2021-10-25 11:04:40.657070,2021-11-28 11:26:22.131395,False,713.0,True,10.0,2020-11-23 13:41:46.587265,2021-09-29 14:53:34.897432,False,True,1.0,3.0,1.0,2.0,2.0,Get motivated!,The ultimate beginner’s program designed to he...,2273.0,2021-10-25 11:04:40.560589,2022-09-07 12:39:46.135605,female,1981-12-31,163.0,66.0,active,lose,mid,15.0,False,True,3.0,es,439070.0,True,8.0,0.0,No implement,24.84,Normal,2021/44,51.0,988.0,368.0,46208.0,9680.0,189.8,16.47,22.35,0.15,0.61,50.95,2463.91,48.31
3,1296432,3.0,9.0,1,2021-11-01 12:05:39.909842,53220.0,3.0,5398,2020-10-15 12:37:17.966340,2021-10-13 09:46:29.957461,"{Piernas,Glúteos}","{glúteos,"" cuádriceps""}","{cadera,"" rodillas"","" tobillos""}",3.5,Lunges (right),8261.0,3.0,6132.0,4.0,3.0,368.0,2021-11-01 12:05:40.078024,False,710.0,2.0,1105.0,PM1.2_V2,Session 2,7181.0,2021-10-25 11:04:40.657070,2021-11-28 11:26:22.131395,False,713.0,True,10.0,2020-11-23 13:41:46.587265,2021-09-29 14:53:34.897432,False,True,1.0,3.0,1.0,2.0,2.0,Get motivated!,The ultimate beginner’s program designed to he...,2273.0,2021-10-25 11:04:40.560589,2022-09-07 12:39:46.135605,female,1981-12-31,163.0,66.0,active,lose,mid,15.0,False,True,3.0,es,439070.0,True,8.0,0.0,No implement,24.84,Normal,2021/44,51.0,988.0,368.0,46208.0,9680.0,189.8,16.47,22.35,0.15,0.61,50.95,2463.91,48.31
4,1296433,3.0,9.0,2,2021-11-01 12:05:39.914045,53220.0,3.0,5399,2020-10-15 12:37:17.976799,2021-10-13 09:46:29.967689,"{Piernas,Glúteos}","{glúteos,"" cuádriceps""}","{cadera,"" rodillas"","" tobillos""}",3.5,Lunges (left),8261.0,3.0,6132.0,4.0,3.0,368.0,2021-11-01 12:05:40.078024,False,710.0,2.0,1105.0,PM1.2_V2,Session 2,7181.0,2021-10-25 11:04:40.657070,2021-11-28 11:26:22.131395,False,713.0,True,10.0,2020-11-23 13:41:46.587265,2021-09-29 14:53:34.897432,False,True,1.0,3.0,1.0,2.0,2.0,Get motivated!,The ultimate beginner’s program designed to he...,2273.0,2021-10-25 11:04:40.560589,2022-09-07 12:39:46.135605,female,1981-12-31,163.0,66.0,active,lose,mid,15.0,False,True,3.0,es,439070.0,True,8.0,0.0,No implement,24.84,Normal,2021/44,51.0,988.0,368.0,46208.0,9680.0,189.8,16.47,22.35,0.15,0.61,50.95,2463.91,48.31


In [134]:
df = df33.copy()

In [135]:
cols = ['name_en_exercises','discarded_session_execution', 'code_name_sessions', 
        'name_en_sessions', 'active_user_programs', 'completed_user_programs',
        'pro_programs', 'available_programs', 'name_en_programs', 
        'gender', 
        'activity_level', 'goal', 'body_type', 'newsletter_subscription', 'notifications_setting',
        'language', 'scientific_data_usage', 'BMI_category', 'name_en_implements']

for col in cols:
    df[col] = df[col].astype('category')
    
col_date = ['updated_at_ex_ex', 'created_at_exercises', 'updated_at_exercises',
            'updated_at_session_execution', 'created_at_user_programs', 'updated_at_user_programs',
            'created_at_programs', 'updated_at_programs', 'created_at_users', 'updated_at_users',
            'date_of_birth']

for col in col_date:
    df[col] = pd.to_datetime(df[col])#.dt.strftime("%Y-%m-%d %H:%M:%S")

cat_names = {
    'gender': {1: 'male', 0: 'female'},
    'activity_level': {0: 'very active', 1: 'active', 2: 'sedentary'},
    'goal': {0: 'lose', 1: 'gain', 2: 'antiaging'},
    'body_type': {0: 'thin', 1: 'mid', 2: 'strong'}
}

df = df.replace(cat_names)


In [136]:
df['total_sessions_users2'].max()

303.0

In [137]:
exercise_per_sess = pd.DataFrame(df.groupby(['id_session_execution'])['id_session_execution'].count())

exercise_per_sess.rename(columns = {'id_session_execution': 'number_exercises_in_session'}, inplace = True)

exercise_per_sess.reset_index(inplace = True)

df_no_rest = df[df['name_en_exercises'] != 'Rest']

exercise_per_sess_no_rest = pd.DataFrame(df_no_rest.groupby(['id_session_execution'])['id_session_execution'].count())

exercise_per_sess_no_rest.rename(columns = {'id_session_execution': 'number_exercises_without_rest_in_session'}, inplace = True)

exercise_per_sess_no_rest.reset_index(inplace = True)

df = df.merge(exercise_per_sess, how = 'left', left_on = 'id_session_execution', right_on = 'id_session_execution')

df = df.merge(exercise_per_sess_no_rest, how = 'left', left_on = 'id_session_execution', right_on = 'id_session_execution')

In [138]:
'''
df.drop(['number_exercises_without_rest_in_session_y', 'number_exercises_in_session_y'], inplace = True, axis = 1)

df.rename(columns = {'number_exercises_without_rest_in_session_x': 'number_exercises_without_rest_in_session',
                     'number_exercises_in_session_x': 'number_exercises_in_session'},
         inplace = True)

'''


exercises_per_user = pd.DataFrame(df.groupby(['id_users'])['number_exercises_in_session'].sum())

exercises_per_user.rename(columns = {'number_exercises_in_session': 'exercises_per_user'}, inplace = True)

exercises_per_user.reset_index(inplace = True)

df = df.merge(exercises_per_user, how = 'left', left_on = 'id_users', right_on = 'id_users')

In [139]:
'''
df.drop(['exercises_per_user_y'], axis = 1, inplace = True)
df.rename(columns = {'exercises_per_user_x': 'exercises_per_users'},
         inplace = True)
'''

exercises_no_rest_per_user = pd.DataFrame(df.groupby(['id_users'])['number_exercises_without_rest_in_session'].sum())

exercises_no_rest_per_user.rename(columns = {'number_exercises_without_rest_in_session': 'exercises_without_rest_per_user'}, inplace = True)

exercises_no_rest_per_user.reset_index(inplace = True)

df = df.merge(exercises_no_rest_per_user, how = 'left', left_on = 'id_users', right_on = 'id_users')

In [140]:
df.head()

Unnamed: 0,id_ex_ex,reps_executed_ex_ex,execution_time_ex_ex,order_ex_ex,updated_at_ex_ex,id_session_set_ex,order_session_set_ex,id_exercises,created_at_exercises,updated_at_exercises,body_parts_focused_exercises,muscles_exercises,joints_exercises,met_multiplier_exercises,name_en_exercises,id_session_block_ex,order_session_block_ex,id_session_execution,difficulty_feedback_session_execution,enjoyment_feedback_session_execution,reps_executed_session_execution,updated_at_session_execution,discarded_session_execution,id_sessions,order_sessions,time_duration_sessions,code_name_sessions,name_en_sessions,id_user_programs,created_at_user_programs,updated_at_user_programs,active_user_programs,current_session_id_user_programs,completed_user_programs,id_programs,created_at_programs,updated_at_programs,pro_programs,available_programs,strength_programs,endurance_programs,technique_programs,flexibility_programs,intensity_programs,name_en_programs,description_en_programs,id_users,created_at_users,updated_at_users,gender,date_of_birth,height,weight,activity_level,goal,body_type,body_fat,newsletter_subscription,notifications_setting,training_days_setting,language,points,scientific_data_usage,best_weekly_streak_users,id_implements,name_en_implements,BMI,BMI_category,YYYY/WW,total_sessions_users2,total_time_session_execution,total_reps_session_execution,total_time_users2,total_reps_users2,reps_per_session_users2,total_time_session_execution_min,reps_per_min_session_execution,exercise_execution_time_min,calories,calories_session_execution,total_calories_users2,kcal_per_session_users2,number_exercises_in_session,number_exercises_without_rest_in_session,exercises_per_user,exercises_without_rest_per_user
0,1296434,0.0,2.0,3,2021-11-01 12:05:39.918185,53220.0,3.0,5968,2020-10-16 09:33:15.172657,2022-01-21 19:15:56.401181,{},{0},{0},1.0,Rest,8261.0,3.0,6132.0,4.0,3.0,368.0,2021-11-01 12:05:40.078024,False,710.0,2.0,1105.0,PM1.2_V2,Session 2,7181.0,2021-10-25 11:04:40.657070,2021-11-28 11:26:22.131395,False,713.0,True,10.0,2020-11-23 13:41:46.587265,2021-09-29 14:53:34.897432,False,True,1.0,3.0,1.0,2.0,2.0,Get motivated!,The ultimate beginner’s program designed to he...,2273.0,2021-10-25 11:04:40.560589,2022-09-07 12:39:46.135605,female,1981-12-31,163.0,66.0,active,lose,mid,15.0,False,True,3.0,es,439070.0,True,8.0,0.0,No implement,24.84,Normal,2021/44,51.0,988.0,368.0,46208.0,9680.0,189.8,16.47,22.35,0.03,0.04,50.95,2463.91,48.31,55,32,52898,40465
1,1296435,3.0,8.0,1,2021-11-01 12:05:39.928907,53221.0,4.0,5398,2020-10-15 12:37:17.966340,2021-10-13 09:46:29.957461,"{Piernas,Glúteos}","{glúteos,"" cuádriceps""}","{cadera,"" rodillas"","" tobillos""}",3.5,Lunges (right),8261.0,3.0,6132.0,4.0,3.0,368.0,2021-11-01 12:05:40.078024,False,710.0,2.0,1105.0,PM1.2_V2,Session 2,7181.0,2021-10-25 11:04:40.657070,2021-11-28 11:26:22.131395,False,713.0,True,10.0,2020-11-23 13:41:46.587265,2021-09-29 14:53:34.897432,False,True,1.0,3.0,1.0,2.0,2.0,Get motivated!,The ultimate beginner’s program designed to he...,2273.0,2021-10-25 11:04:40.560589,2022-09-07 12:39:46.135605,female,1981-12-31,163.0,66.0,active,lose,mid,15.0,False,True,3.0,es,439070.0,True,8.0,0.0,No implement,24.84,Normal,2021/44,51.0,988.0,368.0,46208.0,9680.0,189.8,16.47,22.35,0.13,0.54,50.95,2463.91,48.31,55,32,52898,40465
2,1296436,3.0,9.0,2,2021-11-01 12:05:39.933210,53221.0,4.0,5399,2020-10-15 12:37:17.976799,2021-10-13 09:46:29.967689,"{Piernas,Glúteos}","{glúteos,"" cuádriceps""}","{cadera,"" rodillas"","" tobillos""}",3.5,Lunges (left),8261.0,3.0,6132.0,4.0,3.0,368.0,2021-11-01 12:05:40.078024,False,710.0,2.0,1105.0,PM1.2_V2,Session 2,7181.0,2021-10-25 11:04:40.657070,2021-11-28 11:26:22.131395,False,713.0,True,10.0,2020-11-23 13:41:46.587265,2021-09-29 14:53:34.897432,False,True,1.0,3.0,1.0,2.0,2.0,Get motivated!,The ultimate beginner’s program designed to he...,2273.0,2021-10-25 11:04:40.560589,2022-09-07 12:39:46.135605,female,1981-12-31,163.0,66.0,active,lose,mid,15.0,False,True,3.0,es,439070.0,True,8.0,0.0,No implement,24.84,Normal,2021/44,51.0,988.0,368.0,46208.0,9680.0,189.8,16.47,22.35,0.15,0.61,50.95,2463.91,48.31,55,32,52898,40465
3,1296432,3.0,9.0,1,2021-11-01 12:05:39.909842,53220.0,3.0,5398,2020-10-15 12:37:17.966340,2021-10-13 09:46:29.957461,"{Piernas,Glúteos}","{glúteos,"" cuádriceps""}","{cadera,"" rodillas"","" tobillos""}",3.5,Lunges (right),8261.0,3.0,6132.0,4.0,3.0,368.0,2021-11-01 12:05:40.078024,False,710.0,2.0,1105.0,PM1.2_V2,Session 2,7181.0,2021-10-25 11:04:40.657070,2021-11-28 11:26:22.131395,False,713.0,True,10.0,2020-11-23 13:41:46.587265,2021-09-29 14:53:34.897432,False,True,1.0,3.0,1.0,2.0,2.0,Get motivated!,The ultimate beginner’s program designed to he...,2273.0,2021-10-25 11:04:40.560589,2022-09-07 12:39:46.135605,female,1981-12-31,163.0,66.0,active,lose,mid,15.0,False,True,3.0,es,439070.0,True,8.0,0.0,No implement,24.84,Normal,2021/44,51.0,988.0,368.0,46208.0,9680.0,189.8,16.47,22.35,0.15,0.61,50.95,2463.91,48.31,55,32,52898,40465
4,1296433,3.0,9.0,2,2021-11-01 12:05:39.914045,53220.0,3.0,5399,2020-10-15 12:37:17.976799,2021-10-13 09:46:29.967689,"{Piernas,Glúteos}","{glúteos,"" cuádriceps""}","{cadera,"" rodillas"","" tobillos""}",3.5,Lunges (left),8261.0,3.0,6132.0,4.0,3.0,368.0,2021-11-01 12:05:40.078024,False,710.0,2.0,1105.0,PM1.2_V2,Session 2,7181.0,2021-10-25 11:04:40.657070,2021-11-28 11:26:22.131395,False,713.0,True,10.0,2020-11-23 13:41:46.587265,2021-09-29 14:53:34.897432,False,True,1.0,3.0,1.0,2.0,2.0,Get motivated!,The ultimate beginner’s program designed to he...,2273.0,2021-10-25 11:04:40.560589,2022-09-07 12:39:46.135605,female,1981-12-31,163.0,66.0,active,lose,mid,15.0,False,True,3.0,es,439070.0,True,8.0,0.0,No implement,24.84,Normal,2021/44,51.0,988.0,368.0,46208.0,9680.0,189.8,16.47,22.35,0.15,0.61,50.95,2463.91,48.31,55,32,52898,40465


In [141]:
df_users_only_once = df.drop_duplicates(subset=['id_users'], keep='first')

In [142]:
df_users_only_once.head()

Unnamed: 0,id_ex_ex,reps_executed_ex_ex,execution_time_ex_ex,order_ex_ex,updated_at_ex_ex,id_session_set_ex,order_session_set_ex,id_exercises,created_at_exercises,updated_at_exercises,body_parts_focused_exercises,muscles_exercises,joints_exercises,met_multiplier_exercises,name_en_exercises,id_session_block_ex,order_session_block_ex,id_session_execution,difficulty_feedback_session_execution,enjoyment_feedback_session_execution,reps_executed_session_execution,updated_at_session_execution,discarded_session_execution,id_sessions,order_sessions,time_duration_sessions,code_name_sessions,name_en_sessions,id_user_programs,created_at_user_programs,updated_at_user_programs,active_user_programs,current_session_id_user_programs,completed_user_programs,id_programs,created_at_programs,updated_at_programs,pro_programs,available_programs,strength_programs,endurance_programs,technique_programs,flexibility_programs,intensity_programs,name_en_programs,description_en_programs,id_users,created_at_users,updated_at_users,gender,date_of_birth,height,weight,activity_level,goal,body_type,body_fat,newsletter_subscription,notifications_setting,training_days_setting,language,points,scientific_data_usage,best_weekly_streak_users,id_implements,name_en_implements,BMI,BMI_category,YYYY/WW,total_sessions_users2,total_time_session_execution,total_reps_session_execution,total_time_users2,total_reps_users2,reps_per_session_users2,total_time_session_execution_min,reps_per_min_session_execution,exercise_execution_time_min,calories,calories_session_execution,total_calories_users2,kcal_per_session_users2,number_exercises_in_session,number_exercises_without_rest_in_session,exercises_per_user,exercises_without_rest_per_user
0,1296434,0.0,2.0,3,2021-11-01 12:05:39.918185,53220.0,3.0,5968,2020-10-16 09:33:15.172657,2022-01-21 19:15:56.401181,{},{0},{0},1.0,Rest,8261.0,3.0,6132.0,4.0,3.0,368.0,2021-11-01 12:05:40.078024,False,710.0,2.0,1105.0,PM1.2_V2,Session 2,7181.0,2021-10-25 11:04:40.657070,2021-11-28 11:26:22.131395,False,713.0,True,10.0,2020-11-23 13:41:46.587265,2021-09-29 14:53:34.897432,False,True,1.0,3.0,1.0,2.0,2.0,Get motivated!,The ultimate beginner’s program designed to he...,2273.0,2021-10-25 11:04:40.560589,2022-09-07 12:39:46.135605,female,1981-12-31,163.0,66.0,active,lose,mid,15.0,False,True,3.0,es,439070.0,True,8.0,0.0,No implement,24.84,Normal,2021/44,51.0,988.0,368.0,46208.0,9680.0,189.8,16.47,22.35,0.03,0.04,50.95,2463.91,48.31,55,32,52898,40465
55,1301839,3.0,9.0,1,2021-11-02 06:34:48.954209,54970.0,7.0,5398,2020-10-15 12:37:17.966340,2021-10-13 09:46:29.957461,"{Piernas,Glúteos}","{glúteos,"" cuádriceps""}","{cadera,"" rodillas"","" tobillos""}",3.5,Lunges (right),8692.0,3.0,6293.0,7.0,4.0,368.0,2021-11-02 06:34:49.022130,False,685.0,2.0,1105.0,PH1.2_V2,Session 2,26994.0,2021-11-01 08:52:54.714380,2021-11-07 07:34:19.999433,False,688.0,True,6.0,2020-11-23 11:28:22.066874,2021-09-29 14:53:41.901478,False,True,1.0,2.0,1.0,2.0,2.0,Get motivated!,The ultimate beginner’s program that helps you...,3466.0,2021-10-28 06:01:36.983684,2021-12-22 06:41:24.982549,male,1980-01-16,179.0,86.0,sedentary,lose,thin,45.0,True,True,4.0,es,140321.0,False,1.0,0.0,No implement,26.84,Overweight,2021/44,5.0,937.0,368.0,2847.0,850.0,170.0,15.62,23.56,0.15,0.79,54.2,188.45,37.69,55,32,5028,3618
110,1302658,10.0,65.0,1,2021-11-02 07:30:39.314452,55265.0,1.0,5279,2020-10-15 12:37:16.490561,2021-10-13 09:46:29.275591,"{""Todo el cuerpo""}","{pectorales,"" tríceps"","" cuádriceps""}","{hombros,"" cadera""}",3.8,Semi burpee,8754.0,1.0,6313.0,6.0,4.0,140.0,2021-11-02 07:30:39.446576,False,764.0,1.0,494.0,PM1.P2.1_V2,Session 1,26835.0,2021-10-31 14:36:50.033053,2022-03-28 09:36:57.673985,False,773.0,True,23.0,2020-11-23 14:03:38.477856,2021-09-29 14:53:34.661221,False,True,1.0,3.0,1.0,2.0,2.0,Smash your goals,Choose the program created to help you get int...,1288.0,2021-10-25 11:00:14.208971,2022-04-22 06:48:52.351002,female,1977-10-01,166.0,56.0,sedentary,gain,thin,25.0,True,True,3.0,es,106260.0,False,7.0,0.0,No implement,20.32,Normal,2021/44,26.0,620.0,140.0,39849.0,7504.0,288.62,10.33,13.55,1.08,4.03,24.31,2023.44,77.82,27,14,56888,42271
137,1297365,0.0,60.0,2,2021-11-01 15:45:59.090013,53565.0,2.0,5968,2020-10-16 09:33:15.172657,2022-01-21 19:15:56.401181,{},{0},{0},1.0,Rest,8338.0,1.0,6159.0,7.0,4.0,144.0,2021-11-01 15:45:59.329889,False,630.0,37.0,1338.0,W010_D01,Transitions & Global - Session 37,26176.0,2021-10-28 15:35:59.277604,2021-10-28 15:35:59.277604,False,594.0,False,30.0,2020-11-23 14:12:50.072945,2021-09-29 14:53:59.484766,True,True,4.0,4.0,5.0,2.0,4.0,Ring Master,The simplest equipment is often the most effec...,7761.0,2021-10-28 06:35:56.542124,2022-09-07 18:28:49.283551,male,1974-08-29,186.0,79.0,active,gain,mid,10.0,True,True,5.0,es,212090.0,False,37.0,0.0,No implement,22.84,Normal,2021/44,133.0,1464.0,144.0,241247.0,36623.0,275.36,24.4,5.9,1.0,1.38,80.17,16051.71,120.69,30,17,529939,333274
167,1301869,0.0,10.0,2,2021-11-02 06:35:00.981664,54984.0,5.0,5968,2020-10-16 09:33:15.172657,2022-01-21 19:15:56.401181,{},{0},{0},1.0,Rest,8694.0,2.0,6294.0,5.0,3.0,192.0,2021-11-02 06:35:01.217080,False,690.0,2.0,743.0,PH2.2_V2,Session 2,5714.0,2021-10-25 11:02:33.852694,2021-11-08 06:08:32.605613,False,693.0,True,7.0,2020-11-23 13:39:16.901367,2021-09-29 14:53:35.087878,False,True,2.0,2.0,1.0,2.0,2.0,Get motivated!,The ultimate beginner’s program that helps you...,1799.0,2021-10-25 11:02:33.756287,2022-09-01 05:25:42.723261,male,1976-11-04,173.0,76.0,sedentary,lose,mid,30.0,True,True,4.0,es,460011.0,False,8.0,0.0,No implement,25.39,Overweight,2021/44,98.0,1016.0,192.0,102908.8,22227.0,226.81,16.93,11.34,0.17,0.22,60.14,7198.98,73.46,55,32,109160,76422


In [143]:
df.to_csv(r'/home/evida-monika/mhunters/final_data_4.csv'+ '.bz2', header=True, index=False,compression='bz2')
