## Librerías

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime as datetime

## Carga y visualización de datos

In [2]:
sueño = pd.read_excel('sueño_withings_2.xlsx')
pasos = pd.read_excel('agregados_pasos_2.xlsx')

In [3]:
sueño.head()

Unnamed: 0,de,a,ligero (s),profundo (s),Rem (seg),despierto (s),despertar,Duration to sleep (s),Duration to wake up (s),Snoring (s),Snoring episodes,Average heart rate,Heart rate (min),Heart rate (max),Night events
0,2018-04-12T02:03:00+02:00,2018-04-12T06:54:00+02:00,10020,6060,0,1380,0,1380,0,0,0,0,0,0,
1,2018-04-13T02:30:00+02:00,2018-04-13T07:29:00+02:00,13380,3240,0,1320,1,0,0,0,0,0,0,0,
2,2018-04-14T01:07:00+02:00,2018-04-14T10:01:00+02:00,23520,8100,0,420,0,360,60,0,0,0,0,0,
3,2018-04-15T00:57:00+02:00,2018-04-15T09:35:00+02:00,23460,5640,0,1980,2,360,0,0,0,0,0,0,
4,2018-04-16T01:46:00+02:00,2018-04-16T06:41:00+02:00,11040,6120,0,540,1,360,0,0,0,0,0,0,


In [4]:
pasos.head()

Unnamed: 0,date,value
0,2018-11-26,4385
1,2018-11-25,0
2,2018-11-24,0
3,2018-11-23,64
4,2018-11-22,0


In [5]:
pasos['pasos'] = pasos['value']
pasos = pasos.drop('value', axis=1)

In [6]:
print(sueño.shape, pasos.shape)

(89, 15) (229, 2)


In [7]:
sueño.dtypes

de                          object
a                           object
ligero (s)                   int64
profundo (s)                 int64
Rem (seg)                    int64
despierto (s)                int64
despertar                    int64
Duration to sleep (s)        int64
Duration to wake up (s)      int64
Snoring (s)                  int64
Snoring episodes             int64
Average heart rate           int64
Heart rate (min)             int64
Heart rate (max)             int64
Night events               float64
dtype: object

## Unificación de tablas

In [8]:
# Creamos la columna 'Fecha Despertar' por la que vamos a hacer la concatenación
sueño['Fecha Despertar'] = sueño['a'].astype(str)
sueño['Fecha Despertar'] = sueño['Fecha Despertar'].str.slice(0,10)

# Cambiamos a formato fecha
sueño['Fecha Despertar'] = pd.to_datetime(sueño['Fecha Despertar'])

In [9]:
# Unificamos las tablas por la columna común
sueño_pasos = sueño.merge(pasos, left_on='Fecha Despertar', right_on='date')
sueño_pasos.head()

Unnamed: 0,de,a,ligero (s),profundo (s),Rem (seg),despierto (s),despertar,Duration to sleep (s),Duration to wake up (s),Snoring (s),Snoring episodes,Average heart rate,Heart rate (min),Heart rate (max),Night events,Fecha Despertar,date,pasos
0,2018-04-12T02:03:00+02:00,2018-04-12T06:54:00+02:00,10020,6060,0,1380,0,1380,0,0,0,0,0,0,,2018-04-12,2018-04-12,6337
1,2018-04-13T02:30:00+02:00,2018-04-13T07:29:00+02:00,13380,3240,0,1320,1,0,0,0,0,0,0,0,,2018-04-13,2018-04-13,12161
2,2018-04-14T01:07:00+02:00,2018-04-14T10:01:00+02:00,23520,8100,0,420,0,360,60,0,0,0,0,0,,2018-04-14,2018-04-14,14541
3,2018-04-15T00:57:00+02:00,2018-04-15T09:35:00+02:00,23460,5640,0,1980,2,360,0,0,0,0,0,0,,2018-04-15,2018-04-15,7817
4,2018-04-16T01:46:00+02:00,2018-04-16T06:41:00+02:00,11040,6120,0,540,1,360,0,0,0,0,0,0,,2018-04-16,2018-04-16,7798


In [10]:
sueño_pasos.shape

(86, 18)

## Análisis de las variables

In [11]:
for i in sueño_pasos.columns:
    print(i)
    print(sueño_pasos[i].describe(), '\n')

de
count                            86
unique                           86
top       2018-04-12T02:03:00+02:00
freq                              1
Name: de, dtype: object 

a
count                            86
unique                           86
top       2018-04-12T06:54:00+02:00
freq                              1
Name: a, dtype: object 

ligero (s)
count       86.000000
mean     13677.209302
std       6192.998943
min       1440.000000
25%       8370.000000
50%      12600.000000
75%      18045.000000
max      28500.000000
Name: ligero (s), dtype: float64 

profundo (s)
count       86.000000
mean      3740.918605
std       2255.234978
min        240.000000
25%       2235.000000
50%       3420.000000
75%       5040.000000
max      15300.000000
Name: profundo (s), dtype: float64 

Rem (seg)
count    86.0
mean      0.0
std       0.0
min       0.0
25%       0.0
50%       0.0
75%       0.0
max       0.0
Name: Rem (seg), dtype: float64 

despierto (s)
count      86.000000
mean      853.267

## Transformación de las columnas

In [12]:
# Eliminamos las varibales que no nos interesan
sueño_pasos = sueño_pasos.drop(columns=['Rem (seg)', 'Snoring (s)', 'Snoring episodes', 'Average heart rate',
                                       'Heart rate (min)', 'Heart rate (max)', 'Night events', 'date'])
sueño_pasos.head()

Unnamed: 0,de,a,ligero (s),profundo (s),despierto (s),despertar,Duration to sleep (s),Duration to wake up (s),Fecha Despertar,pasos
0,2018-04-12T02:03:00+02:00,2018-04-12T06:54:00+02:00,10020,6060,1380,0,1380,0,2018-04-12,6337
1,2018-04-13T02:30:00+02:00,2018-04-13T07:29:00+02:00,13380,3240,1320,1,0,0,2018-04-13,12161
2,2018-04-14T01:07:00+02:00,2018-04-14T10:01:00+02:00,23520,8100,420,0,360,60,2018-04-14,14541
3,2018-04-15T00:57:00+02:00,2018-04-15T09:35:00+02:00,23460,5640,1980,2,360,0,2018-04-15,7817
4,2018-04-16T01:46:00+02:00,2018-04-16T06:41:00+02:00,11040,6120,540,1,360,0,2018-04-16,7798


In [13]:
# Quitamos la zona horaria de las columnas 'Desde' y 'Hasta'
sueño_pasos['Desde'] = sueño_pasos['de'].astype(str)
sueño_pasos['Hasta'] = sueño_pasos['a'].astype(str)

sueño_pasos['Desde'] = sueño_pasos['Desde'].str[:-6]
sueño_pasos['Hasta'] = sueño_pasos['Hasta'].str[:-6]

sueño_pasos['Desde'] = pd.to_datetime(sueño_pasos['Desde'])
sueño_pasos['Hasta'] = pd.to_datetime(sueño_pasos['Hasta'])

In [14]:
# Creamos la columna Tiempo Durmiendo
sueño_pasos['Tiempo Durmiendo (s)'] = sueño_pasos['ligero (s)'] + sueño_pasos['profundo (s)']

In [15]:
# Vamos a obtener la hora y minuto de las columnas 'Desde' y 'Hasta'
sueño_pasos['Hora Desde'] = sueño_pasos['Desde'].dt.hour
sueño_pasos['Minuto Desde'] = sueño_pasos['Desde'].dt.minute
sueño_pasos = sueño_pasos.drop(columns='Desde')

sueño_pasos['Hora Hasta'] = sueño_pasos['Hasta'].dt.hour
sueño_pasos['Minuto Hasta'] = sueño_pasos['Hasta'].dt.minute
sueño_pasos = sueño_pasos.drop(columns='Hasta')

In [16]:
# Eliminamos la variable 'Fecha Despertar' porque no nos sirve para el modelo KMeans
sueño_pasos = sueño_pasos.drop(columns='Fecha Despertar')

In [17]:
sueño_pasos = sueño_pasos.drop(columns=['de', 'a'])

In [18]:
sueño_pasos.head()

Unnamed: 0,ligero (s),profundo (s),despierto (s),despertar,Duration to sleep (s),Duration to wake up (s),pasos,Tiempo Durmiendo (s),Hora Desde,Minuto Desde,Hora Hasta,Minuto Hasta
0,10020,6060,1380,0,1380,0,6337,16080,2,3,6,54
1,13380,3240,1320,1,0,0,12161,16620,2,30,7,29
2,23520,8100,420,0,360,60,14541,31620,1,7,10,1
3,23460,5640,1980,2,360,0,7817,29100,0,57,9,35
4,11040,6120,540,1,360,0,7798,17160,1,46,6,41


In [19]:
# Renombramos las columnas
sueño_pasos.rename(columns={'ligero (s)': 'Ligero (s)'}, inplace=True)
sueño_pasos.rename(columns={'profundo (s)': 'Profundo (s)'}, inplace=True)
sueño_pasos.rename(columns={'despierto (s)': 'Despierto (s)'}, inplace=True)
sueño_pasos.rename(columns={'despertar': 'Interrupciones'}, inplace=True)
sueño_pasos.rename(columns={'Duration to sleep (s)': 'Conciliación (s)'}, inplace=True)
sueño_pasos.rename(columns={'Duration to wake up (s)': 'Tiempo en Levantarse (s)'}, inplace=True)
sueño_pasos.rename(columns={'pasos': 'Pasos'}, inplace=True)
sueño_pasos.rename(columns={'Hora Desde': 'Hora Dormir'}, inplace=True)
sueño_pasos.rename(columns={'Minuto Desde': 'Minuto Dormir'}, inplace=True)
sueño_pasos.rename(columns={'Hora Hasta': 'Hora Despertar'}, inplace=True)
sueño_pasos.rename(columns={'Minuto Hasta': 'Minuto Despertar'}, inplace=True)

# Las ordenamos
sueño_pasos = sueño_pasos[['Hora Dormir', 'Minuto Dormir', 'Hora Despertar', 'Minuto Despertar', 
                          'Tiempo Durmiendo (s)', 'Ligero (s)', 'Profundo (s)', 'Despierto (s)', 
                          'Conciliación (s)', 'Tiempo en Levantarse (s)', 'Interrupciones', 'Pasos']]
sueño_pasos.head()

Unnamed: 0,Hora Dormir,Minuto Dormir,Hora Despertar,Minuto Despertar,Tiempo Durmiendo (s),Ligero (s),Profundo (s),Despierto (s),Conciliación (s),Tiempo en Levantarse (s),Interrupciones,Pasos
0,2,3,6,54,16080,10020,6060,1380,1380,0,0,6337
1,2,30,7,29,16620,13380,3240,1320,0,0,1,12161
2,1,7,10,1,31620,23520,8100,420,360,60,0,14541
3,0,57,9,35,29100,23460,5640,1980,360,0,2,7817
4,1,46,6,41,17160,11040,6120,540,360,0,1,7798


In [20]:
# Guardamos el DataFrame
sueño_pasos.to_csv('df_sueño_pasos_2.csv')