# Estudio descriptivo con SQR

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Cargamos las librerías necesarias
from sklearn import preprocessing
from statistics import mode
import gower
from sklearn.cluster import AgglomerativeClustering
import scipy.cluster.hierarchy as sch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [3]:
os.chdir('..')

In [4]:
# Cargamos datos con SQR
perfiles_sqr = pd.read_parquet('data/processed/perfiles_sqr_filtrado.parquet')
perfiles_sqr.reset_index(drop=True, inplace=True) # reseteamos el índice

### SQR en sexo

In [5]:
perfiles_sqr.groupby('sexo').agg({'sqr': [min, max, 'mean', 'std', 'size'],})


Unnamed: 0_level_0,sqr,sqr,sqr,sqr,sqr
Unnamed: 0_level_1,min,max,mean,std,size
sexo,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Female,0.093528,97.0198,31.935421,17.525474,303
Male,0.039927,98.9901,34.335482,18.758541,364


### SQR en posición

In [6]:
perfiles_sqr.groupby('posicion').agg({'sqr': [min, max, 'mean', 'std', 'size'],})

Unnamed: 0_level_0,sqr,sqr,sqr,sqr,sqr
Unnamed: 0_level_1,min,max,mean,std,size
posicion,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Lateral,0.039927,98.9901,32.27568,17.421881,456
Supine,0.132861,97.6852,35.340464,19.758723,211


### SQR en altura dependiendo del sexo

In [12]:
# Categorizamos altura para hombre y mujer
perfiles_sqr["altura_cat_hombre"] = pd.cut(perfiles_sqr['altura'], bins=[0, 160, 170, 185, 250], include_lowest=True,
       labels=['<1.6', '1.6-1.7', '1.7-1.85', '>1.85'])
perfiles_sqr["altura_cat_mujer"] = pd.cut(perfiles_sqr['altura'], bins=[0, 150, 160, 170, 250], include_lowest=True,
       labels=['<1.50', '1.5-1.6', '1.6-1.7', '>1.7'])

#### Hombres

In [11]:
perfiles_sqr[perfiles_sqr['sexo']=="Male"].groupby('altura_cat_hombre').agg({'sqr': [min, max, 'mean', 'std', 'size'],})

Unnamed: 0_level_0,sqr,sqr,sqr,sqr,sqr
Unnamed: 0_level_1,min,max,mean,std,size
altura_cat_hombre,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
<1.6,14.3623,56.9062,36.4623,12.839455,9
1.6-1.7,0.039927,98.9901,33.592818,20.75767,58
1.7-1.85,0.132861,98.1634,33.55504,18.407528,264
>1.85,11.7184,97.6697,41.304267,18.459575,33


#### Mujeres

In [13]:
perfiles_sqr[perfiles_sqr['sexo']=="Female"].groupby('altura_cat_mujer').agg({'sqr': [min, max, 'mean', 'std', 'size'],})

Unnamed: 0_level_0,sqr,sqr,sqr,sqr,sqr
Unnamed: 0_level_1,min,max,mean,std,size
altura_cat_mujer,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
<1.50,19.5363,51.3709,33.921712,12.155514,8
1.5-1.6,0.957338,73.0798,31.368161,16.325843,92
1.6-1.7,0.093528,97.0198,32.315135,17.980602,162
>1.7,1.35201,76.6299,31.320396,19.568137,41


### SQR en peso dependiendo del sexo

In [15]:
# Categorizamos peso para hombre y mujer
perfiles_sqr["peso_cat_hombre"] = pd.cut(perfiles_sqr['peso'], bins=[0, 60, 75, 90, 250], include_lowest=True,
       labels=['<60', '60-75', '75-90', '>90'])
perfiles_sqr["peso_cat_mujer"] = pd.cut(perfiles_sqr['peso'], bins=[0, 50, 65, 80, 250], include_lowest=True,
       labels=['<50', '50-65', '65-80', '>80'])

#### Hombres

In [16]:
perfiles_sqr[perfiles_sqr['sexo']=="Male"].groupby('peso_cat_hombre').agg({'sqr': [min, max, 'mean', 'std', 'size'],})

Unnamed: 0_level_0,sqr,sqr,sqr,sqr,sqr
Unnamed: 0_level_1,min,max,mean,std,size
peso_cat_hombre,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
<60,13.8221,46.0235,25.0864,13.184555,5
60-75,0.039927,98.9901,32.843877,19.927983,85
75-90,0.624949,98.1634,34.694258,19.093157,156
>90,0.132861,97.6697,35.327539,17.643573,118


#### Mujeres

In [18]:
perfiles_sqr[perfiles_sqr['sexo']=="Female"].groupby('peso_cat_mujer').agg({'sqr': [min, max, 'mean', 'std', 'size'],})

Unnamed: 0_level_0,sqr,sqr,sqr,sqr,sqr
Unnamed: 0_level_1,min,max,mean,std,size
peso_cat_mujer,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
<50,13.2646,56.1226,31.834579,13.259204,19
50-65,0.093528,73.0798,28.972477,16.757007,148
65-80,2.20728,83.704,35.262348,17.07242,99
>80,1.35201,97.0198,34.937204,21.745213,37


## Ejemplos

In [20]:
cat_bigmen = perfiles_sqr[(perfiles_sqr["sexo"]=="Male") & (perfiles_sqr["posicion"] == "Supine") 
                               & (perfiles_sqr["altura"]>=180) & (perfiles_sqr["peso"] >= 80)]
cat_smallwomen = perfiles_sqr[(perfiles_sqr["sexo"]=="Female") & (perfiles_sqr["posicion"] == "Lateral")
                                   & (perfiles_sqr["altura"]<=160) & (perfiles_sqr["peso"] <= 60)]
# Cambiamos la posicion
cat_bigmen2 = perfiles_sqr[(perfiles_sqr["sexo"]=="Male") & (perfiles_sqr["posicion"] == "Lateral") 
                               & (perfiles_sqr["altura"]>=180) & (perfiles_sqr["peso"] >= 80)]
cat_smallwomen2 = perfiles_sqr[(perfiles_sqr["sexo"]=="Female") & (perfiles_sqr["posicion"] == "Supine")
                                   & (perfiles_sqr["altura"]<=160) & (perfiles_sqr["peso"] <= 60)]

In [23]:
# Hombres grandes Supine
cat_bigmen['sqr'].describe()

count    50.000000
mean     35.668762
std      20.594726
min       0.624949
25%      22.958500
50%      30.469150
75%      40.125200
max      97.685200
Name: sqr, dtype: float64

In [24]:
# Hombres grandes Lateral
cat_bigmen2['sqr'].describe()

count    77.000000
mean     34.757574
std      15.158779
min       0.546476
25%      24.619900
50%      33.276000
75%      43.184000
max      76.971000
Name: sqr, dtype: float64

In [25]:
# Mujeres pequeñas Supine
cat_smallwomen['sqr'].describe()

count    45.000000
mean     26.975716
std      13.893197
min       2.196300
25%      19.508300
50%      25.259200
75%      32.304000
max      56.668100
Name: sqr, dtype: float64

In [26]:
# Mujeres pequeñas Lateral
cat_smallwomen2['sqr'].describe()

count    14.000000
mean     34.286717
std      20.089276
min       0.957338
25%      18.470125
50%      31.679300
75%      48.853800
max      71.569700
Name: sqr, dtype: float64