# 睡眠数据分析

### 1. 导入库
---

In [3]:
import pandas as pd
import numpy as np

### 2. 导入数据
---

In [14]:
df = pd.read_csv('../data/sleep.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 13 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   person_id                400 non-null    int64  
 1   gender                   400 non-null    object 
 2   age                      400 non-null    int64  
 3   occupation               400 non-null    object 
 4   sleep_duration           400 non-null    float64
 5   sleep_quality            400 non-null    float64
 6   physical_activity_level  400 non-null    int64  
 7   stress_level             400 non-null    int64  
 8   bmi_category             400 non-null    object 
 9   blood_pressure           400 non-null    object 
 10  heart_rate               400 non-null    int64  
 11  daily_steps              400 non-null    int64  
 12  sleep_disorder           110 non-null    object 
dtypes: float64(2), int64(6), object(5)
memory usage: 40.8+ KB


### 3. 数据清洗
---

In [15]:
df.isna().sum()
# 发现sleep_disorder列有很多的缺失值
# 删除缺失值
df.drop(columns='sleep_disorder', inplace=True)
df.isna().sum()

person_id                  0
gender                     0
age                        0
occupation                 0
sleep_duration             0
sleep_quality              0
physical_activity_level    0
stress_level               0
bmi_category               0
blood_pressure             0
heart_rate                 0
daily_steps                0
dtype: int64

### 4. 数据特征的构造
---

In [22]:
# 将性别转换为category
df.head()
df['gender'] = df['gender'].astype('category')
df['occupation'] = df['occupation'].astype('category')
df['bmi_category'] = df['bmi_category'].astype('category')
df[['high', 'low']] = df['blood_pressure'].str.split('/', expand=True)
df.info()

# 睡眠质量的分箱
labels = ['差', '中', '优']
df['quality_level'] = pd.cut(df['sleep_quality'], bins=3, labels=labels)

age_labels = ['青少年', '中年', '老年']
df['age_level'] = pd.cut(df['age'], bins=3, labels=age_labels)
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 15 columns):
 #   Column                   Non-Null Count  Dtype   
---  ------                   --------------  -----   
 0   person_id                400 non-null    int64   
 1   gender                   400 non-null    category
 2   age                      400 non-null    int64   
 3   occupation               400 non-null    category
 4   sleep_duration           400 non-null    float64 
 5   sleep_quality            400 non-null    float64 
 6   physical_activity_level  400 non-null    int64   
 7   stress_level             400 non-null    int64   
 8   bmi_category             400 non-null    category
 9   blood_pressure           400 non-null    object  
 10  heart_rate               400 non-null    int64   
 11  daily_steps              400 non-null    int64   
 12  high                     400 non-null    object  
 13  low                      400 non-null    object  
 14  quality_le

Unnamed: 0,person_id,gender,age,occupation,sleep_duration,sleep_quality,physical_activity_level,stress_level,bmi_category,blood_pressure,heart_rate,daily_steps,high,low,quality_level,age_level
0,1,Male,29,Manual Labor,7.4,7.0,41,7,Obese,124/70,91,8539,124,70,中,青少年
1,2,Female,43,Retired,4.2,4.9,41,5,Obese,131/86,81,18754,131,86,中,中年
2,3,Male,44,Retired,6.1,6.0,107,4,Underweight,122/70,81,2857,122,70,中,中年
3,4,Male,29,Office Worker,8.3,10.0,20,10,Obese,124/72,55,6886,124,72,优,青少年
4,5,Male,67,Retired,9.1,9.5,19,4,Overweight,133/78,97,14945,133,78,优,老年


### 5. 数据分析
---

In [23]:
print(df['bmi_category'].value_counts())

bmi_category
Overweight     109
Underweight    102
Obese           98
Normal          91
Name: count, dtype: int64


In [26]:
# 根据不同的BMI来分组看睡眠质量
df.groupby(['age_level', 'bmi_category']).agg({
    'sleep_duration': 'mean',
    'sleep_quality': 'mean',
    'stress_level': 'mean'
})

  df.groupby(['age_level', 'bmi_category']).agg({


Unnamed: 0_level_0,Unnamed: 1_level_0,sleep_duration,sleep_quality,stress_level
age_level,bmi_category,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
青少年,Normal,8.1,6.332,4.86
青少年,Obese,8.25,6.253448,5.534483
青少年,Overweight,8.214286,6.171429,5.31746
青少年,Underweight,7.603279,5.883607,5.42623
中年,Normal,7.422222,6.65,4.944444
中年,Obese,7.805556,6.216667,5.888889
中年,Overweight,8.246154,5.95641,5.974359
中年,Underweight,8.4975,5.9075,5.75
老年,Normal,7.42,4.24,4.2
老年,Obese,7.9,5.025,8.0
