In [None]:
import pandas as pd
import numpy as np
import datetime
from datetime import date, timedelta
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
idx=pd.IndexSlice
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)
plt.style.use('ggplot')

In [None]:
data = pd.read_csv('Sleep_health_and_lifestyle_dataset.csv')

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.isna().sum()

In [None]:
data.duplicated().sum()

In [None]:
data[['Systolic', 'Diastolic']] = data['Blood Pressure'].str.split('/', expand=True)

In [None]:
data.drop('Blood Pressure', axis=1, inplace=True)

In [None]:
data.set_index('Person ID', inplace=True)

In [None]:
#Sleep Duration Analysis 

In [None]:
data.head()

In [None]:
sns.histplot(data['Sleep Duration'], kde=True)

In [None]:
data.groupby('Gender')['Sleep Duration'].mean()

In [None]:
df=data.groupby('Occupation')['Sleep Duration'].mean().sort_values(ascending=False).reset_index()
plt.figure(figsize=(16, 8))
sns.barplot(data=df, x='Occupation', y='Sleep Duration')
plt.title('Avg Sleep Duration of Different Occupation')
plt.axhline(data['Sleep Duration'].mean(), ls='--', color='black', alpha=0.3)
plt.legend(['Avg Sleep Duration'])
plt.plot(df['Sleep Duration'], linestyle='--')

In [None]:
df = data.groupby('Age')[['Sleep Duration','Quality of Sleep']].mean().reset_index().sort_values('Age', ascending=False).set_index('Age')
plt.figure(figsize=(16, 8))
sns.lineplot(data=df, x='Age', y='Sleep Duration')
sns.lineplot(data=df, x='Age', y='Quality of Sleep', alpha=0.5)
plt.legend(['Sleep Duration','Quality of Sleep'])
plt.title('Avg Sleep Duration vs. Avg Quantity of Sleep');

#Similar Trend 

In [None]:
for x in ['BMI Category', 'Sleep Disorder']:
    df = data.groupby(x)['Sleep Duration'].mean().sort_values(ascending=False).reset_index()
    plt.figure(figsize=(18,2))
    sns.barplot(data=df, x='Sleep Duration', y=x)
    plt.axvline(data['Sleep Duration'].mean(), ls='--', alpha=0.3, color='black')
    plt.title(f'Avg Sleep Duration for Differnt {x}')

In [None]:
# Quality of Sleep Analysis 

In [None]:
data.head()

In [None]:
data['Quality of Sleep'].unique()

In [None]:
data['Quality of Sleep'].value_counts()

In [None]:
df = data.groupby('Occupation')['Quality of Sleep'].mean().sort_values(ascending=False).reset_index()
plt.figure(figsize=(16, 8))
sns.barplot(data=df, x='Quality of Sleep', y='Occupation')
plt.axvline(data['Quality of Sleep'].mean(), color='black', ls='--', alpha=0.3)
plt.legend(['Avg Quality of Sleep'])
plt.title('Avg Quality of Sleep of Different Occupations');

In [None]:
for x in ['BMI Category', 'Sleep Disorder']:
    df = data.groupby(x)['Quality of Sleep'].mean().sort_values(ascending=False).reset_index()
    plt.figure(figsize=(18,2))
    sns.barplot(data=df, x='Quality of Sleep', y=x)
    plt.axvline(data['Quality of Sleep'].mean(), ls='--', alpha=0.3, color='black')
    plt.title(f'Avg Quality of Sleep for Differnt {x}')

In [None]:
data.groupby('Gender')['Quality of Sleep'].value_counts(normalize=True)

In [None]:
data.groupby('Occupation')['Quality of Sleep'].value_counts(normalize=True)

In [None]:
#Physical Activity Level

In [None]:
for x in ['Occupation', 'BMI Category', 'Sleep Disorder']:
    df = data.groupby(x)['Physical Activity Level'].mean().sort_values(ascending=False).reset_index()
    plt.figure(figsize=(16,4))
    sns.barplot(data=df, x='Physical Activity Level', y=x)
    plt.axvline(data['Physical Activity Level'].mean(), ls='--', alpha=0.3, color='black')
    plt.title(f'Avg Physical Activity Level for Differnt {x}')

In [None]:
data.loc[:, ['Physical Activity Level', 'Heart Rate', 'Daily Steps']].corr()

In [None]:
#BMI Category Analysis / Sleep Disorder / Gender /Occupation with dummyAnalysis

In [None]:
data.head()

In [None]:
data['BMI Category'].value_counts()

In [None]:
data.groupby('Occupation')['BMI Category'].value_counts()

In [None]:
dummy=pd.get_dummies(data['BMI Category'])
df=pd.concat([dummy, data['Occupation']], axis=1)
dfm = df.groupby('Occupation').sum().T
dfm

In [None]:
for x in dfm.columns:
    fig, ax = plt.subplots()
    ax.pie(
        x=dfm[x],
        startangle=90,
        labels=dfm.index,
        autopct="%.0f%%",
        shadow=True
        )
    plt.title(f"{x} of BMI Category Distribution", fontsize=12)

In [None]:
dummy=pd.get_dummies(data['Sleep Disorder'])
df=pd.concat([dummy, data['Occupation']], axis=1)
df.groupby('Occupation').sum().T

In [None]:
dummy=pd.get_dummies(data['Sleep Disorder'])
df=pd.concat([dummy,data["Gender"]], axis=1)
dfm=df.groupby('Gender').sum().T
dfm

In [None]:
data.groupby('Gender')['Sleep Disorder'].value_counts()

In [None]:
def dummy(data, feature, variable):
    dummies=pd.get_dummies(data[feature])
    df=pd.concat([dummies, data[variable]], axis=1)
    dfm=df.groupby(variable).sum().T
    return dfm

In [None]:
dummy(data, 'Sleep Disorder', 'Gender')

In [None]:
dummy(data, 'BMI Category', 'Gender')

In [None]:
for x in ['Sleep Disorder', 'BMI Category']:
    dfm=dummy(data, x, 'Gender')
#     dummies=pd.get_dummies(data[x])
#     df=pd.concat([dummies, data['Gender']], axis=1)
#     dfm=df.groupby('Gender').sum().T
    
    for y in dfm.columns:
        fig, ax = plt.subplots()
        ax.pie(
            x=dfm[y],
            startangle=90,
            labels=dfm.index,
            autopct="%.0f%%",
            shadow=True,
            )
        plt.title(f'{x} Distribution among {y}', fontsize=12)

In [None]:
#Correlation

In [None]:
data.head()

In [None]:
data.corr()

In [None]:
sns.pairplot(data)

In [None]:
sns.heatmap(data.corr(), annot=True)

In [None]:
sns.lmplot(x='Sleep Duration', y='Stress Level', data=data)

In [None]:
sns.lmplot(x='Heart Rate', y='Stress Level', data=data)