# Instalacion de las bibliotecas necesarias plotly y statmodels
## Analisis de datos de un SmartWatch con Python
Conjunto de datos tomado de 30 usuarias del reloj inteligente Fitbit 

In [1]:
# Iniciar el análisis importando e instalando las bibliotecas necesarias
# %pip install plotly==5.8.1

Collecting plotly==5.8.1
  Downloading plotly-5.8.1-py2.py3-none-any.whl (15.2 MB)
     -------------------------------------- 15.2/15.2 MB 974.8 kB/s eta 0:00:00
Collecting tenacity>=6.2.0
  Downloading tenacity-8.0.1-py3-none-any.whl (24 kB)
Installing collected packages: tenacity, plotly
Successfully installed plotly-5.8.1 tenacity-8.0.1
Note: you may need to restart the kernel to use updated packages.




In [2]:
# %pip install statsmodels

Collecting statsmodels
  Downloading statsmodels-0.13.2-cp310-cp310-win_amd64.whl (9.1 MB)
     ---------------------------------------- 9.1/9.1 MB 997.0 kB/s eta 0:00:00
Collecting scipy>=1.3
  Downloading scipy-1.8.1-cp310-cp310-win_amd64.whl (36.9 MB)
     ---------------------------------------- 36.9/36.9 MB 1.3 MB/s eta 0:00:00
Collecting patsy>=0.5.2
  Downloading patsy-0.5.2-py2.py3-none-any.whl (233 kB)
     -------------------------------------- 233.7/233.7 kB 1.2 MB/s eta 0:00:00
Collecting pandas>=0.25
  Downloading pandas-1.4.2-cp310-cp310-win_amd64.whl (10.6 MB)
     ---------------------------------------- 10.6/10.6 MB 1.6 MB/s eta 0:00:00
Collecting pytz>=2020.1
  Downloading pytz-2022.1-py2.py3-none-any.whl (503 kB)
     -------------------------------------- 503.5/503.5 kB 1.4 MB/s eta 0:00:00
Installing collected packages: pytz, scipy, patsy, pandas, statsmodels
Successfully installed pandas-1.4.2 patsy-0.5.2 pytz-2022.1 scipy-1.8.1 statsmodels-0.13.2
Note: you may ne



In [14]:
# Importar las bibliotecas
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [15]:
# Importación de la data 
data = pd.read_csv('../data_SmartWatch/dailyActivity.csv')
print(data.head())

           Id ActivityDate  TotalSteps  TotalDistance  TrackerDistance  \
0  1503960366    4/12/2016       13162           8.50             8.50   
1  1503960366    4/13/2016       10735           6.97             6.97   
2  1503960366    4/14/2016       10460           6.74             6.74   
3  1503960366    4/15/2016        9762           6.28             6.28   
4  1503960366    4/16/2016       12669           8.16             8.16   

   LoggedActivitiesDistance  VeryActiveDistance  ModeratelyActiveDistance  \
0                       0.0                1.88                      0.55   
1                       0.0                1.57                      0.69   
2                       0.0                2.44                      0.40   
3                       0.0                2.14                      1.26   
4                       0.0                2.71                      0.41   

   LightActiveDistance  SedentaryActiveDistance  VeryActiveMinutes  \
0                 6.06

In [16]:
# Se revisa que no contenga ningún valor nulo
print(data.isnull().sum())

Id                          0
ActivityDate                0
TotalSteps                  0
TotalDistance               0
TrackerDistance             0
LoggedActivitiesDistance    0
VeryActiveDistance          0
ModeratelyActiveDistance    0
LightActiveDistance         0
SedentaryActiveDistance     0
VeryActiveMinutes           0
FairlyActiveMinutes         0
LightlyActiveMinutes        0
SedentaryMinutes            0
Calories                    0
dtype: int64


In [17]:
# Se revisa la infomación del conjunto de datos
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 940 entries, 0 to 939
Data columns (total 15 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Id                        940 non-null    int64  
 1   ActivityDate              940 non-null    object 
 2   TotalSteps                940 non-null    int64  
 3   TotalDistance             940 non-null    float64
 4   TrackerDistance           940 non-null    float64
 5   LoggedActivitiesDistance  940 non-null    float64
 6   VeryActiveDistance        940 non-null    float64
 7   ModeratelyActiveDistance  940 non-null    float64
 8   LightActiveDistance       940 non-null    float64
 9   SedentaryActiveDistance   940 non-null    float64
 10  VeryActiveMinutes         940 non-null    int64  
 11  FairlyActiveMinutes       940 non-null    int64  
 12  LightlyActiveMinutes      940 non-null    int64  
 13  SedentaryMinutes          940 non-null    int64  
 14  Calories  

In [18]:
# La fecha es un objeto, se debe cambiar a formato de fecha
# Cambiando el tipo de dato de ActivityDate
data["ActivityDate"] = pd.to_datetime(data["ActivityDate"], format="%m/%d/%Y")
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 940 entries, 0 to 939
Data columns (total 15 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Id                        940 non-null    int64         
 1   ActivityDate              940 non-null    datetime64[ns]
 2   TotalSteps                940 non-null    int64         
 3   TotalDistance             940 non-null    float64       
 4   TrackerDistance           940 non-null    float64       
 5   LoggedActivitiesDistance  940 non-null    float64       
 6   VeryActiveDistance        940 non-null    float64       
 7   ModeratelyActiveDistance  940 non-null    float64       
 8   LightActiveDistance       940 non-null    float64       
 9   SedentaryActiveDistance   940 non-null    float64       
 10  VeryActiveMinutes         940 non-null    int64         
 11  FairlyActiveMinutes       940 non-null    int64         
 12  LightlyActiveMinutes  

In [19]:
# Obtener el total de minutos de todos los tipos de actividad fisíca realizada
data["TotalMinutes"] = data["VeryActiveMinutes"] + data["FairlyActiveMinutes"] + data["LightlyActiveMinutes"] + data["SedentaryMinutes"]
print(data["TotalMinutes"].sample(5))

795    1440
337     940
112    1440
863    1440
675    1440
Name: TotalMinutes, dtype: int64


In [20]:
# Revisar la estadística descriptiva del conjunto de datos
print(data.describe())

                 Id    TotalSteps  TotalDistance  TrackerDistance  \
count  9.400000e+02    940.000000     940.000000       940.000000   
mean   4.855407e+09   7637.910638       5.489702         5.475351   
std    2.424805e+09   5087.150742       3.924606         3.907276   
min    1.503960e+09      0.000000       0.000000         0.000000   
25%    2.320127e+09   3789.750000       2.620000         2.620000   
50%    4.445115e+09   7405.500000       5.245000         5.245000   
75%    6.962181e+09  10727.000000       7.712500         7.710000   
max    8.877689e+09  36019.000000      28.030001        28.030001   

       LoggedActivitiesDistance  VeryActiveDistance  ModeratelyActiveDistance  \
count                940.000000          940.000000                940.000000   
mean                   0.108171            1.502681                  0.567543   
std                    0.619897            2.658941                  0.883580   
min                    0.000000            0.000000   

In [21]:
# Análisis de datos de la relación entre las calorías quemadas y el total de pasos caminados en un día
figure = px.scatter(data_frame = data, x="Calories",
   y="TotalSteps", size="VeryActiveMinutes", 
   trendline="ols",
   title="Relationship between Calories & Total Steps")
figure.show()

Se puede observar una relación lineal entre el número de pasos y las calorías quemadas en un día.

In [22]:
# Obtener el número total promedio de minutos activos en un día.
label = ["Very Active Minutes", "Fairly Active Minutes", 
         "Lightly Active Minutes", "Inactive Minutes"]
counts = data[["VeryActiveMinutes", "FairlyActiveMinutes", 
               "LightlyActiveMinutes", "SedentaryMinutes"]].mean()
colors = ['gold','lightgreen', "pink", "lightblue"]

fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Total Active Minutes')
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=30,
                  marker=dict(colors=colors, line=dict(color='black', width=3)))
fig.show()

Observaciones:  
* 991.21 minutos, lo que corresponde a 81.3% del total de minutos inactivos en un día
* 192.81 minutos, correspondiente a 15.8% de minutos ligeramente activos en un día
* 21.16 minutos (1,74%) fueron muy activos
* y 13 minutos bastantes activos en un día (1.11%) 

Encontrar los días de la semana de los registros y agregar una nueva columna a este conjunto de datos como "Día":

In [23]:
data["Day"] = data["ActivityDate"].dt.day_name()
print(data["Day"].head())

0      Tuesday
1    Wednesday
2     Thursday
3       Friday
4     Saturday
Name: Day, dtype: object


In [24]:
# Listar los minutos muy activos, bastante activos y ligeramente activos en cada día de la semana:
fig = go.Figure()
fig.add_trace(go.Bar(
    x=data["Day"],
    y=data["VeryActiveMinutes"],
    name='Very Active',
    marker_color='purple'
))
fig.add_trace(go.Bar(
    x=data["Day"],
    y=data["FairlyActiveMinutes"],
    name='Fairly Active',
    marker_color='green'
))
fig.add_trace(go.Bar(
    x=data["Day"],
    y=data["LightlyActiveMinutes"],
    name='Lightly Active',
    marker_color='pink'
))
fig.update_layout(barmode='group', xaxis_tickangle=-45)
fig.show()

In [25]:
# Listar la cantidad de minutos inactivos en cada día de la semana:
day = data["Day"].value_counts()
label = day.index
counts = data["SedentaryMinutes"]
colors = ["lightblue","lightgreen", "pink", "cyan", "orange", "yellow"]

fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Inactive Minutes Daily')
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=30,
                  marker=dict(colors=colors, line=dict(color='black', width=3)))
fig.show()

Del análisis se puede saber que el jueves es el día más inactivo de acuerdo con el estilo de vida de todos los individuos en el conjunto de datos.

In [13]:
# ¿Cuál es la cantidad de calorías quemadas en cada día de la semana?
calories = data["Day"].value_counts()
label = calories.index
counts = data["Calories"]
colors = ['gold','lightgreen', "pink", "blue", "skyblue", "cyan", "orange"]

fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Calories Burned Daily')
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=30,
                  marker=dict(colors=colors, line=dict(color='black', width=3)))
fig.show()

El martes es, por lo tanto, el día más activo para todas las personas en el conjunto de datos, ya que el mayor número de calorías se quemó los martes.