In this notebook, I am analyzing the Fitbit dataset for the Capstone Bellabeat project. 
In a previous notebook, I load that dataset into my personal BiqQuery area. Here I will analyze the data from that bigquery dataset.

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt 
from google.cloud import bigquery

projectID = 'test-project-306614'
datasetID = 'Fitabase'
client = bigquery.Client(project=projectID, location='US')  ## not sure if the location is needed
dataset = client.get_dataset(datasetID)   
tables = list(client.list_tables(dataset))
for table in tables: print(table.table_id)

## Daily Activity

I'll first concentrate in daily activity. 

## Daily active 

In [2]:
query = f""" 
SELECT 
    CAST(Id AS STRING) AS newId,
    COUNT(ActivityDate) AS countDay,
    SUM(TotalSteps) AS sumTotalSteps,
    SUM(TotalDistance) AS sumTotalDistance,
    SUM(TrackerDistance) AS sumTrackerDistance,
    SUM(LoggedActivitiesDistance) AS sumLoggedActivitiesDistance,
    SUM(VeryActiveDistance) AS sumVeryActiveDistance,
    SUM(ModeratelyActiveDistance) AS sumModeratelyActiveDistance,
    SUM(LightActiveDistance) AS sumLightActiveDistance,
    SUM(SedentaryActiveDistance) AS sumSedentaryActiveDistance,
    SUM(VeryActiveMinutes) AS sumVeryActiveMinutes,
    SUM(FairlyActiveMinutes) AS sumFairlyActiveMinutes,
    SUM(LightlyActiveMinutes) AS sumLightlyActiveMinutes,
    SUM(SedentaryMinutes) AS sumSedentaryMinutes,
    SUM(Calories) AS sumCalories
FROM `{projectID}.{datasetID}.dailyActivity`
--WHERE Id <> 4057192912                      -- removing dirty data
WHERE ActivityDate BETWEEN '2016-04-12' AND '2016-05-07'
GROUP BY Id
HAVING countDay > 20 
ORDER BY sumTotalSteps
"""

query_job = client.query(query)
sumDailyActivity = query_job.to_dataframe()
plt.figure(figsize=(10,10))
sns.barplot(data=sumDailyActivity, 
            x='sumTotalSteps', y='newId',
#             color='gray',
           ).set(
            title='Overall total steps per individual', 
            xlabel='Sum of total steps', ylabel='Person ID'
)

In [3]:
plt.figure(figsize=(10,10))
sns.histplot(data=sumDailyActivity, 
            x='sumTotalSteps', #hue='Id',
            bins = range( 10000, 500000, 50000 )
           ).set(
            title='Histogram of total steps for all the individuals', 
            xlabel='Sum of total steps', ylabel='Participants'
)

In [4]:
plt.figure(figsize=(10,5))
sns.regplot(data=sumDailyActivity, 
            x='sumTotalDistance', y='sumCalories', #hue='newId'
           ).set(
            title='Histogram of total steps for all the individuals', 
            xlabel='Sum of total steps', ylabel='Participants'
)

In [5]:
query = f""" 
SELECT 
    Id,
    FORMAT_DATE('%A', ActivityDate) AS DayOfWeek,
    COUNT(Id) AS numDayOfWeek,
    SUM(TotalSteps) AS sumTotalSteps,
    SUM(TotalDistance) AS sumTotalDistance,
    SUM(TrackerDistance) AS sumTrackerDistance,
    SUM(LoggedActivitiesDistance) AS sumLoggedActivitiesDistance,
    SUM(VeryActiveDistance) AS sumVeryActiveDistance,
    SUM(ModeratelyActiveDistance) AS sumModeratelyActiveDistance,
    SUM(LightActiveDistance) AS sumLightActiveDistance,
    SUM(SedentaryActiveDistance) AS sumSedentaryActiveDistance,
    SUM(VeryActiveMinutes) AS sumVeryActiveMinutes,
    SUM(FairlyActiveMinutes) AS sumFairlyActiveMinutes,
    SUM(LightlyActiveMinutes) AS sumLightlyActiveMinutes,
    SUM(SedentaryMinutes) AS sumSedentaryMinutes,    
    SUM(Calories) AS sumCalories
FROM `{projectID}.{datasetID}.dailyActivity`
WHERE (ActivityDate BETWEEN '2016-04-12' AND '2016-05-07') 
    AND (Id NOT IN (4057192912, 3372868164, 8253242879, 2347167796))
GROUP BY Id, DayOfWeek
ORDER BY Id, DayOfWeek
"""

query_job = client.query(query)
dataWeekly = query_job.to_dataframe()
display(dataWeekly.head())

plt.figure(figsize=(10,5))
sns.barplot(data=dataWeekly,
             x='DayOfWeek', y='sumTotalSteps',
            order=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
           ).set(title='Number of total steps for all the participants for each day of the week',
                 xlabel="Day of the Week", ylabel='Sum of total steps'
                )

In [6]:
query = f""" 
SELECT 
--    *,
    FORMAT_DATE('%A', ActivityDate) AS DayOfWeek,
    COUNT(*)
FROM `{projectID}.{datasetID}.dailyActivity`
WHERE (ActivityDate BETWEEN '2016-04-12' AND '2016-05-07') 
    AND (Id NOT IN (4057192912, 3372868164, 8253242879, 2347167796))
    AND Id = 1503960366
GROUP BY DayOfWeek
"""
query_job = client.query(query)
data = query_job.to_dataframe()
display(data)

In [7]:
quantity = 'SedentaryActiveDistance'
dataWeekly[f'weighted{quantity}'] = dataWeekly[f'sum{quantity}'] / dataWeekly['numDayOfWeek']
plt.figure(figsize=(10,5))
sns.barplot(data=dataWeekly,
             x='DayOfWeek', y=f'weighted{quantity}',
            order=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
           ).set(title='Number of total steps for all the participants for each day of the week',
                 xlabel="Day of the Week", ylabel='Sum of total steps'
                )

In [8]:
query = f""" 
SELECT 
--    CAST(Id AS STRING) AS newId,
    AVG(TotalSteps) AS avgTotalSteps,
    STDDEV(TotalSteps) AS stddevTotalSteps,
    AVG(TotalDistance) AS avgTotalDistance,
    STDDEV(TotalDistance) AS stddevTotalDistance,
    AVG(TrackerDistance) AS avgTrackerDistance,
    STDDEV(TrackerDistance) AS stddevTrackerDistance,
    AVG(LoggedActivitiesDistance) AS avgLoggedActivitiesDistance,
    STDDEV(LoggedActivitiesDistance) AS stddevLoggedActivitiesDistance,
    AVG(VeryActiveDistance) AS avgVeryActiveDistance,
    STDDEV(VeryActiveDistance) AS stddevVeryActiveDistance,
    AVG(ModeratelyActiveDistance) AS avgModeratelyActiveDistance,
    STDDEV(ModeratelyActiveDistance) AS stddevModeratelyActiveDistance,
    AVG(LightActiveDistance) AS avgLightActiveDistance,
    STDDEV(LightActiveDistance) AS stddevLightActiveDistance,
    AVG(SedentaryActiveDistance) AS avgSedentaryActiveDistance,
    STDDEV(SedentaryActiveDistance) AS stddevSedentaryActiveDistance,
    AVG(VeryActiveMinutes) AS avgVeryActiveMinutes,
    STDDEV(VeryActiveMinutes) AS stddevVeryActiveMinutes,
    AVG(FairlyActiveMinutes) AS avgFairlyActiveMinutes,
    STDDEV(FairlyActiveMinutes) AS stddevFairlyActiveMinutes,
    AVG(LightlyActiveMinutes) AS avgLightlyActiveMinutes,
    STDDEV(LightlyActiveMinutes) AS stddevLightlyActiveMinutes,
    AVG(SedentaryMinutes) AS avgSedentaryMinutes,    
    STDDEV(SedentaryMinutes) AS stddevSedentaryMinutes,    
    AVG(Calories) AS avgCalories,
    STDDEV(Calories) AS stddevCalories
FROM `{projectID}.{datasetID}.dailyActivity`
WHERE (ActivityDate BETWEEN '2016-04-12' AND '2016-05-07') 
    AND (Id NOT IN (4057192912, 3372868164, 8253242879, 2347167796))
--GROUP BY Id
ORDER BY avgTotalSteps
"""

query_job = client.query(query)
dataWeekly = query_job.to_dataframe()
display(dataWeekly.transpose())

## Daily sleep 

In [9]:
query = f"""
WITH cleanedSleepDay AS(
    SELECT DISTINCT *
    FROM `{projectID}.{datasetID}.sleepDay`
)
SELECT 
    Id,
    COUNT(*) AS count
FROM cleanedSleepDay
GROUP BY Id
ORDER BY count
"""
query_job = client.query(query)
data = query_job.to_dataframe()
display(data)

In [10]:
query = f""" 
WITH cleanedSleepDay AS(
    SELECT DISTINCT *
    FROM `{projectID}.{datasetID}.sleepDay`
)
SELECT 
    CAST(Id AS STRING) as newId,
    SUM(TotalMinutesAsleep) as sumTotalMinutesAsleep,
    SUM(TotalMinutesAsleep)/COUNT(*) as weightedSumTotalMinutesAsleep,
    SUM(TotalTimeInBed) as sumTotalTimeInBed,
    SUM(TotalTimeInBed)/COUNT(*) as weightedSumTotalTimeInBed,
    (SUM(TotalTimeInBed) - SUM(TotalMinutesAsleep)) as sumTotalTimeAwakeInBed,
    (SUM(TotalTimeInBed) - SUM(TotalMinutesAsleep))/COUNT(*) as weightedSumTotalTimeAwakeInBed,
FROM cleanedSleepDay
WHERE (Id NOT IN (2320127002, 7007744171, 1844505072, 6775888955, 8053475328, 1644430081, 1927972279, 4558609924, 4020332650))
GROUP BY Id --, TotalSleepRecords
ORDER BY weightedSumTotalMinutesAsleep
"""
query_job = client.query(query)
data = query_job.to_dataframe()
display(data)
plt.figure(figsize=(10,5))
sns.barplot(data=data,
             y='newId', x='weightedSumTotalMinutesAsleep',
           ).set(title='Weighted sum of the total minutes asleep of the participants',
               ylabel='Participant Id', xlabel='Weighted sum of total minutes asleep'
               )

In [11]:
plt.figure(figsize=(10,5))
sns.barplot(data=data,
             y='newId', x='weightedSumTotalTimeAwakeInBed',
           )

In [12]:
query = f""" 
WITH cleanedSleepDay AS(
    SELECT DISTINCT *
    FROM `{projectID}.{datasetID}.sleepDay`
),
weightedTable AS (
    SELECT 
        CAST(Id AS STRING) as newId,
        SUM(TotalMinutesAsleep)/COUNT(*) as weightedSumTotalMinutesAsleep,
        SUM(TotalTimeInBed)/COUNT(*) as weightedSumTotalTimeInBed,
        (SUM(TotalTimeInBed) - SUM(TotalMinutesAsleep))/COUNT(*) as weightedSumTotalTimeAwakeInBed,
    FROM cleanedSleepDay
    WHERE (Id NOT IN (2320127002, 7007744171, 1844505072, 6775888955, 8053475328, 1644430081, 1927972279, 4558609924, 4020332650))
    GROUP BY Id 
    ORDER BY weightedSumTotalMinutesAsleep
)
SELECT 
    AVG(weightedSumTotalMinutesAsleep) AS avgWeightedSumTotalMonutesAsleep,
    STDDEV(weightedSumTotalMinutesAsleep) AS stddevWeightedSumTotalMonutesAsleep,
    AVG(weightedSumTotalTimeInBed) AS avgWeightedSumTotalTimeInBed,
    STDDEV(weightedSumTotalTimeInBed) AS stddevWeightedSumTotalTimeInBed,
    AVG(weightedSumTotalTimeAwakeInBed) AS avgWeightedSumTotalTimeAwakeInBed,
    STDDEV(weightedSumTotalTimeAwakeInBed) AS stddevWeightedSumTotalTimeAwakeInBed,
FROM weightedTable

"""
query_job = client.query(query)
data = query_job.to_dataframe()
display(data.transpose())

## Weight Information

In [13]:
query = f""" 
WITH cleanedWeightLogInfo AS(
    SELECT DISTINCT *
    FROM `{projectID}.{datasetID}.weightLogInfo`
)
SELECT 
    CAST(Id AS STRING) as newId,
    COUNT(*) as count
--    AVG(WeightKg) AS avgWeightKg,
--    MIN(WeightKg) AS minWeightKg,
--    MAX(WeightKg) AS maxWeightKg,
FROM cleanedWeightLogInfo
GROUP BY Id 
ORDER BY count
"""
query_job = client.query(query)
data = query_job.to_dataframe()
display(data)

## Correlations between Sleep and Activities

In [14]:
query = f""" 
WITH dailyActivity AS (
    SELECT 
        * EXCEPT(ActivityDate),
        CAST(ActivityDate AS DATE) as Date
    FROM `{projectID}.{datasetID}.dailyActivity`    
),
cleanSleepDay AS (
    SELECT DISTINCT *
    FROM `{projectID}.{datasetID}.sleepDay`    
),
sleepDay AS (
    SELECT 
        * EXCEPT(sleepDay),
        CAST(sleepDay AS DATE) as Date
    FROM cleanSleepDay
),
finalTable AS (
    SELECT 
        dailyActivity.*,
        sleepDay.TotalSleepRecords,
        sleepDay.TotalTimeInBed,
        sleepDay.TotalMinutesAsleep,
    FROM dailyActivity
    RIGHT JOIN sleepDay
        ON dailyActivity.Id = sleepDay.Id AND dailyActivity.Date = sleepDay.Date
)
SELECT
    * EXCEPT(Id),
    CAST(Id AS STRING) AS newId,
--    COUNT(*) AS count
FROM finalTable
WHERE (Id NOT IN (2320127002, 7007744171, 1844505072, 6775888955, 8053475328, 1644430081, 1927972279, 4558609924, 4020332650))
--GROUP BY Id
--ORDER BY count
"""

query_job = client.query(query)
finalDaylyTable = query_job.to_dataframe()
display(finalDaylyTable.head())

plt.figure(figsize=(15,15))
sns.heatmap( data=finalDaylyTable.corr(), linewidths=.5, annot=True 
           ).set(
            title='Correlation matrix of the daily features for 15 participants', 
)

In [15]:
plt.figure(figsize=(10,5))
sns.scatterplot(data=finalDaylyTable, 
            x='SedentaryMinutes', y='TotalMinutesAsleep', hue='newId'
           ).set(
            title='Total minutes asleep against sedentary minutes for 15 participants', 
            xlabel='Sedentary Minutes', ylabel='Total Minutes Asleep'
)
plt.legend(title='Participant Id',bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

In [16]:
plt.figure(figsize=(10,5))
sns.scatterplot(data=finalDaylyTable, 
            x='FairlyActiveMinutes', y='TotalMinutesAsleep', hue='newId'
           ).set(
            title='Total minutes asleep against fairly active minutes for 15 participants', 
            xlabel='Fairly Active Minutes', ylabel='Total Minutes Asleep'
)
plt.legend(title='Participant Id',bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

# Hourly Analysis

## Hourly Steps

In [17]:

query = f""" 
WITH cleanhourlySteps AS ( SELECT DISTINCT * FROM `{projectID}.{datasetID}.hourlySteps` ),
hourlySteps AS (
    SELECT --*
        CAST(Id AS STRING) AS newId,
        CAST(ActivityHour AS DATE) AS Date,
        CAST(ActivityHour AS TIME) AS Time, 
        StepTotal
    FROM cleanhourlySteps
    WHERE (Id NOT IN (4057192912, 3372868164, 8253242879, 2347167796))
),
tmpTable AS (
    SELECT 
        newId,
        Date,
        COUNT(Time) As count,
    FROM hourlySteps
    WHERE (Date BETWEEN '2016-04-12' AND '2016-05-07') 
    GROUP BY newId, Date
    HAVING count = 24
    --ORDER BY count
)
SELECT 
    newId, 
    COUNT(*) as count
FROM tmpTable
GROUP BY newId
ORDER BY count
"""

query_job = client.query(query)
data = query_job.to_dataframe()
data

In [18]:

query = f""" 
WITH hourlySteps AS (
    SELECT --*
        CAST(Id AS STRING) AS newId,
        CAST(ActivityHour AS DATE) AS Date,
        CAST(ActivityHour AS TIME) AS Time, 
        FORMAT_DATE('%A', ActivityHour) AS DayOfWeek,
        StepTotal
    FROM `{projectID}.{datasetID}.hourlySteps`
    WHERE (Id NOT IN (4057192912, 3372868164, 8253242879, 2347167796))
)
SELECT --*
    newId, 
    DayOfWeek,
    Time,
    SUM(StepTotal) AS sumStepTotal,
    COUNT(newId) AS numDayOfWeek,
    SUM(StepTotal)/COUNT(newId) AS weightedSumStepTotal,
FROM hourlySteps
WHERE (Date BETWEEN '2016-04-12' AND '2016-05-07') 
GROUP BY newId, DayOfWeek, Time
ORDER BY Time
"""

query_job = client.query(query)
hourlyStepData = query_job.to_dataframe()
display(hourlyStepData.head())

In [19]:
pd.plotting.register_matplotlib_converters()
##plt.figure(figsize=(0,15)) for some reason this does not work
sns.catplot(data=hourlyStepData, 
            x='Time', y='weightedSumStepTotal', 
                hue='DayOfWeek', hue_order=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
                kind='point', aspect=4, #height=10,
           ).set(
            title='Weighted sum of total steps per hour for different days of the week', 
            ylabel='Sum of total steps', xlabel='Time of the day'
)

## Hourly Calories

In [20]:

query = f""" 
WITH hourlyCalories AS (
    SELECT --*
        CAST(Id AS STRING) AS newId,
        CAST(ActivityHour AS DATE) AS Date,
        CAST(ActivityHour AS TIME) AS Time, 
        FORMAT_DATE('%A', ActivityHour) AS DayOfWeek,
        Calories
    FROM `{projectID}.{datasetID}.hourlyCalories`
    WHERE (Id NOT IN (4057192912, 3372868164, 8253242879, 2347167796))
)
SELECT --*
    newId, 
    DayOfWeek,
    Time,
    SUM(Calories) AS sumCalories,
    COUNT(newId) AS numDayOfWeek,
    SUM(Calories)/COUNT(newId) AS weightedSumCalories,
FROM hourlyCalories
WHERE (Date BETWEEN '2016-04-12' AND '2016-05-07') 
GROUP BY newId, DayOfWeek, Time
ORDER BY Time
"""

query_job = client.query(query)
hourlyCaloriesData = query_job.to_dataframe()
display(hourlyCaloriesData.head())

pd.plotting.register_matplotlib_converters()
sns.catplot(data=hourlyCaloriesData, 
            x='Time', y='weightedSumCalories', 
                hue='DayOfWeek', hue_order=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
                kind='point', aspect=4, #height=10,
           ).set(
            title='Weighted sum of calories per hour for different days of the week', 
            ylabel='Sum of calories', xlabel='Time of the day'
)

## Hourly Intentities

In [21]:

query = f""" 
-- SELECT DISTINCT * FROM `{projectID}.{datasetID}.hourlyIntensities`
WITH hourlyIntensities AS (
    SELECT --*
        CAST(Id AS STRING) AS newId,
        CAST(ActivityHour AS DATE) AS Date,
        CAST(ActivityHour AS TIME) AS Time, 
        FORMAT_DATE('%A', ActivityHour) AS DayOfWeek,
        TotalIntensity,
        AverageIntensity,
    FROM `{projectID}.{datasetID}.hourlyIntensities`
    WHERE (Id NOT IN (4057192912, 3372868164, 8253242879, 2347167796))
)
SELECT --*
    newId, 
    DayOfWeek,
    Time,
    COUNT(newId) AS numDayOfWeek,
    SUM(TotalIntensity) AS sumTotalIntensity,
    SUM(TotalIntensity)/COUNT(newId) AS weightedSumTotalIntensity,
    SUM(AverageIntensity) AS sumAverageIntensity,
    SUM(AverageIntensity)/COUNT(newId) AS weightedSumAverageIntensity,
FROM hourlyIntensities
WHERE (Date BETWEEN '2016-04-12' AND '2016-05-07') 
GROUP BY newId, DayOfWeek, Time
ORDER BY Time
"""

query_job = client.query(query)
hourlyIntensitiesData = query_job.to_dataframe()
display(hourlyIntensitiesData.head())

pd.plotting.register_matplotlib_converters()
sns.catplot(data=hourlyIntensitiesData, 
            x='Time', y='weightedSumTotalIntensity', 
                hue='DayOfWeek', hue_order=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
                kind='point', aspect=4, #height=10,
           ).set(
            title='Weighted sum of total intensities per hour for different days of the week', 
            ylabel='Sum of total intensities', xlabel='Time of the day'
)

In [22]:
pd.plotting.register_matplotlib_converters()
sns.catplot(data=hourlyIntensitiesData, 
            x='Time', y='weightedSumAverageIntensity', 
                hue='DayOfWeek', hue_order=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
                kind='point', aspect=4, #height=10,
           ).set(
            title='Weighted sum of average intensities per hour for different days of the week', 
            ylabel='Sum of average intensities', xlabel='Time of the day'
)

# Minute analysis

In [40]:
query = f""" 
WITH cleanMinuteSleep AS (SELECT DISTINCT * FROM `{projectID}.{datasetID}.minuteSleep`),
minuteSleep AS (
    SELECT --*
        CAST(Id AS STRING) AS newId,
        CAST(logId AS STRING) AS newlogId,
        CAST(date AS DATE) AS Date,
        CAST(date AS TIME) AS Time, 
        FORMAT_DATE('%A', date) AS DayOfWeek,
        value,
    FROM cleanMinuteSleep
    WHERE id NOT IN (2320127002, 7007744171)
)
, tmpdata AS (
    SELECT 
        newId,
        Date,
        value,
        COUNT(*) as count,
        --AVG(time) as avgTime
    from minuteSleep
    GROUP BY newId, Date, value
    -- HAVING count<120
    ORDER BY count
)
SELECT 
    value,
    AVG(count) AS avgValue,
    STDDEV(count) AS stddevValue
FROM tmpdata
--WHERE count>10
GROUP BY value
"""

query_job = client.query(query)
data = query_job.to_dataframe()
data

In [96]:
query = f""" 
WITH cleanMinuteSleep AS (SELECT DISTINCT * FROM `{projectID}.{datasetID}.minuteSleep`),
minuteSleep AS (
    SELECT --*
        CAST(Id AS STRING) AS newId,
        CAST(logId AS STRING) AS newlogId,
        CAST(date AS DATE) AS Date,
        CAST(date AS TIME) AS Time, 
        FORMAT_DATE('%A', date) AS DayOfWeek,
        value,
    FROM cleanMinuteSleep
    WHERE id NOT IN (2320127002, 7007744171)
)
, tmpdata AS (
    SELECT 
        DayOfWeek,
        --value,
        newlogId,
        COUNT(*) as count,
        --AVG(value) as avgTime
        CASE
            WHEN (value = 1) THEN 'Asleep'
            WHEN (value = 2) THEN 'Restless'
            WHEN (value = 3) THEN 'Awake'
        END AS sleepState
    from minuteSleep
    GROUP BY DayOfWeek, newlogId, sleepState
)
SELECT 
    DayOfWeek,
    sleepState,
    AVG(count) AS avgValue,
    STDDEV(count) AS stddevValue
FROM tmpdata
--WHERE count>10
GROUP BY DayOfWeek, sleepState
"""

query_job = client.query(query)
dataMinuteSleep = query_job.to_dataframe()
dataMinuteSleep

In [101]:
pd.plotting.register_matplotlib_converters()
plt.figure(figsize=(10,5))
g = sns.catplot(data=dataMinuteSleep, 
            x='DayOfWeek', y='avgValue', hue='sleepState',
               order=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
                kind='bar', 
                aspect=2, #height=10,
           ).set(
            title='Average time in sleep state per day of the week', 
            ylabel='Average time in each sleep state [min]', xlabel='Day of the week'
)
g.set(yscale="log")
