## Import packages

In [1]:
import pandas as pd
import numpy as np
import datetime

## Load data

In [2]:
activity_data = pd.read_csv('../Raw Data/ACTIVITY.csv')

In [3]:
activity_data.shape

(66, 6)

In [4]:
activity_data.tail()

Unnamed: 0,date,lastSyncTime,steps,distance,runDistance,calories
61,2019-06-12,1560540959,5274,3750,227,146
62,2019-06-13,1560540959,10912,8100,462,312
63,2019-06-14,1560672639,15387,11922,2191,510
64,2019-06-15,1560672640,14261,9890,456,364
65,2019-06-16,1560692298,2026,1378,214,58


In [5]:
sleep_data = pd.read_csv('../Raw Data/SLEEP.csv')

In [6]:
sleep_data.shape

(66, 7)

In [7]:
sleep_data.head()

Unnamed: 0,date,lastSyncTime,deepSleepTime,shallowSleepTime,wakeTime,start,stop
0,2019-04-12,1555128688,0,0,0,1554930000,1554930000
1,2019-04-13,1555256891,0,0,0,1555016400,1555016400
2,2019-04-14,1555302659,0,0,0,1555102800,1555102800
3,2019-04-15,1555384520,156,287,0,1555273620,1555300200
4,2019-04-16,1555473482,203,307,2,1555353480,1555384200


In [8]:
huawei_data = pd.read_excel('../Raw Data/HUAWEI.xlsx', sheet_name='Sheet1')

In [9]:
huawei_data.shape

(66, 4)

In [10]:
huawei_data.head()

Unnamed: 0,date,steps,distance,calories
0,2019-04-12,3672,2.78,119
1,2019-04-13,5223,3.95,170
2,2019-04-14,1682,1.27,55
3,2019-04-15,2713,2.05,92
4,2019-04-16,16007,12.1,545


## Data preparation

### Merging data

In [11]:
activity_data['date'] = pd.to_datetime(activity_data['date'])
sleep_data['date'] = pd.to_datetime(sleep_data['date'])

In [12]:
merged_data_temp = pd.merge(activity_data, sleep_data, on='date', how='inner')
merged_data = pd.merge(merged_data_temp, huawei_data, on='date', how='inner')

In [13]:
merged_data.shape

(66, 15)

### Selecting and renaming columns

In [14]:
merged_data = merged_data[['date','start','stop','deepSleepTime','shallowSleepTime','wakeTime',
                           'steps_x','distance_x','calories_x','steps_y','distance_y','calories_y']]

In [15]:
merged_data = merged_data.rename(columns={'start':'startSleep','stop':'stopSleep',
                                          'steps_x':'stepsMiFit','distance_x':'distanceMiFit','calories_x':'caloriesMiFit',
                                          'steps_y':'stepsHuawei','distance_y':'distanceHuawei','calories_y':'caloriesHuawei',})

### Filtering and indexing data

In [16]:
merged_data = merged_data[(merged_data['date'] > '2019-04-14') & (merged_data['date'] < '2019-06-16')]

In [17]:
merged_data.set_index('date', inplace = True)

In [18]:
merged_data.shape

(62, 11)

### Fixing columns types

In [19]:
merged_data['startSleep'] = pd.to_datetime(merged_data['startSleep'], unit='s')
merged_data['stopSleep'] = pd.to_datetime(merged_data['stopSleep'], unit='s')

In [20]:
merged_data['startSleep'] = merged_data['startSleep'] + datetime.timedelta(hours=3)
merged_data['stopSleep'] = merged_data['stopSleep'] + datetime.timedelta(hours=3)

In [21]:
merged_data['distanceHuawei'] = merged_data['distanceHuawei'] * 1000
merged_data['distanceHuawei'] = merged_data['distanceHuawei'].astype(int)

### Creating new features

In [22]:
merged_data['weekday'] = merged_data.index.dayofweek

In [23]:
merged_data['totalSleepTime'] = (merged_data['stopSleep'] - merged_data['startSleep']).dt.total_seconds() / 60
merged_data['totalSleepTime'] = merged_data['totalSleepTime'].astype(int)

In [24]:
merged_data.shape

(62, 13)

In [25]:
merged_data.head()

Unnamed: 0_level_0,startSleep,stopSleep,deepSleepTime,shallowSleepTime,wakeTime,stepsMiFit,distanceMiFit,caloriesMiFit,stepsHuawei,distanceHuawei,caloriesHuawei,weekday,totalSleepTime
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2019-04-15,2019-04-14 23:27:00,2019-04-15 06:50:00,156,287,0,6158,4287,167,2713,2050,92,0,443
2019-04-16,2019-04-15 21:38:00,2019-04-16 06:10:00,203,307,2,17791,13677,561,16007,12100,545,1,512
2019-04-17,2019-04-16 22:53:00,2019-04-17 06:57:00,195,289,0,4889,3513,146,3164,2390,108,2,484
2019-04-18,2019-04-17 21:06:00,2019-04-18 06:41:00,277,298,0,13788,10476,429,10588,8000,360,3,575
2019-04-19,2019-04-18 23:58:00,2019-04-19 06:48:00,182,163,65,6303,4437,170,2967,2240,101,4,410


## Data analysis

- Understand what % of time Huawei phone is idle compared to MI Fit Band

In [26]:
merged_data['stepsHuawei'].sum() / merged_data['stepsMiFit'].sum()

0.725455786118589

In [27]:
merged_data['distanceHuawei'].sum() / merged_data['distanceMiFit'].sum()

0.7644052679567414

In [28]:
merged_data['caloriesHuawei'].sum() / merged_data['caloriesMiFit'].sum()

0.8993223620522749

- Understand if ratio between steps/distance/calories is the same for phone and band

In [29]:
merged_data['distanceHuawei'].sum() / merged_data['stepsHuawei'].sum()

0.755625193400559

In [30]:
merged_data['distanceMiFit'].sum() / merged_data['stepsMiFit'].sum()

0.7171230912035461

In [31]:
merged_data['distanceHuawei'].sum() / merged_data['caloriesHuawei'].sum()

22.079655543595265

In [32]:
merged_data['distanceMiFit'].sum() / merged_data['caloriesMiFit'].sum()

25.97670216198774