In [None]:
from bs4 import BeautifulSoup
import os
from time import sleep, mktime, time
import datetime
from collections import OrderedDict 
import numpy as np
import re
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import HTML

In [None]:
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The raw code for this notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''') 

## Guide
- An activity should last at least 5 minutes to be recorded
- If the time to reach a place to perform an activity is below 15 minutes, I consider it as part of that activity.
- If an activity is not performed alone, it must contain an element like mom, dad, family, girlfriend, friends, colleagues, flatmates... (write them in order of importance during that activity)

#### Activities importance
- 1 passive activities, in background, like listening to music (commute is an exception that must be followed by a means)
- 1.5 eating activities
- 2 activities that require action, like cooking, time given to people
- 3 special activities that are more active than the previous type, activities that can be performed while listening to music and eating
- 0 activity less important than passive activities, like being with someone or being on a train for many hours


In [None]:
data_folder = '../data/'
soups = []
for suffix in ['', '2', '3', '4']:
    with open(data_folder + '/messages' + suffix + '.html') as fp:
        html_doc = fp.read()
    soups.append(BeautifulSoup(html_doc, 'html.parser'))

In [None]:
activities = {'bike': {'class': 2, 'category': 'transport'},
'breakfast': {'class': 1.5, 'category': 'food'},
'bureaucracy': {'class': 2, 'category': 'other'},
'clean': {'class': 2, 'category': 'hygiene'},
'colleagues': {'class': 0.5, 'category': 'people'},
'commute': {'class': 1, 'category': 'transport'},
'plane': {'class': 0, 'category': 'transport'},
'bus': {'class': 0, 'category': 'transport'},
'tram': {'class': 0, 'category': 'transport'},
'cook': {'class': 2, 'category': 'food'},
'dad': {'class': 0.5, 'category': 'people'},
'dinner': {'class': 1.5, 'category': 'food'},
'dress': {'class': 2, 'category': 'other'},
'???': {'class': 2, 'category': 'other'}, # forgot what it was
'football': {'class': 2, 'category': 'other'},
'exercise': {'class': 2, 'category': 'hobbies'},
'snowboard': {'class': 2, 'category': 'hobbies'},
'facebook': {'class': 3, 'category': 'other'},
'internet': {'class': 3, 'category': 'other'},
'flatmates': {'class': 0.5, 'category': 'people'},
'friends': {'class': 0, 'category': 'people'},
'girlfriend': {'class': 0.5, 'category': 'people'},
'grocery': {'class': 2, 'category': 'food'},
'hygiene': {'class': 2, 'category': 'hygiene'},
'lunch': {'class': 1.5, 'category': 'food'},
'merenda': {'class': 1.5, 'category': 'food'},
'mom': {'class': 0.5, 'category': 'people'},
'museum': {'class': 3, 'category': 'hobbies'},
'movie': {'class': 3, 'category': 'hobbies'},
'documentary': {'class': 3, 'category': 'hobbies'},
'modular': {'class': 3, 'category': 'hobbies'},
'concert': {'class': 3, 'category': 'hobbies'},
'cinema': {'class': 3, 'category': 'other'},
'zoo': {'class': 3, 'category': 'other'},
'plants': {'class': 3, 'category': 'hobbies'},
'hike': {'class': 3, 'category': 'other'},
'music_discover': {'class': 3, 'category': 'hobbies'},
'music_listen': {'class': 0.9, 'category': 'hobbies'},
'music_make': {'class': 2, 'category': 'hobbies'},
'music_play': {'class': 2, 'category': 'hobbies'},
'nap': {'class': 2, 'category': 'sleep'},
'news': {'class': 1, 'category': 'internet'},
'pack': {'class': 2, 'category': 'other'},
'flat_search': {'class': 2, 'category': 'other'},
'piano_play': {'class': 2, 'category': 'hobbies'},
'piano_practice': {'class': 2, 'category': 'hobbies'},
'music_theory': {'class': 2, 'category': 'hobbies'},
'plan': {'class': 2, 'category': 'other'},
'read': {'class': 3, 'category': 'hobbies'},
'code': {'class': 3, 'category': 'hobbies'},
'concert': {'class': 3, 'category': 'hobbies'},
'relatives': {'class': 0.5, 'category': 'people'},
'relax': {'class': 2, 'category': 'other'},
'side_project': {'class': 3, 'category': 'hobbies'},
'sleep': {'class': 2, 'category': 'sleep'},
'strangers': {'class': 2, 'category': 'other'},
'police': {'class': 2, 'category': 'other'},
'stretch': {'class': 2, 'category': 'hobbies'},
'text': {'class': 3, 'category': 'other'},
'toilet': {'class': 1.5, 'category': 'toilet'},
'train': {'class': 0, 'category': 'transport'},
'metro': {'class': 0, 'category': 'transport'},
'walk': {'class': 3, 'category': 'other'},
'work': {'class': 3, 'category': 'work'},
'invest': {'class': 3, 'category': 'hobbies'},
'invest_house': {'class': 3, 'category': 'hobbies'},
'write': {'class': 2, 'category': 'hobbies'},
'bed': {'class': 2, 'category': 'other'},
'car': {'class': 2, 'category': 'transport'},
'boat': {'class': 0, 'category': 'transport'},
'motorbike': {'class': 2, 'category': 'transport'},
'cat': {'class': 3, 'category': 'other'},
'doctor': {'class': 2, 'category': 'other'},
'family': {'class': 0.5, 'category': 'people'},
'shop': {'class': 2, 'category': 'other'},
'piano_shop': {'class': 2, 'category': 'other'},
'sister': {'class': 0.5, 'category': 'people'},
'armonica': {'class': 2, 'category': 'hobbies'},
'violin': {'class': 2, 'category': 'hobbies'}}


In [None]:
# sorted_ma_t = sorted(multiactivity_time.items(), key=lambda kv: -kv[1])
# sorted_ma_t = OrderedDict(sorted_ma_t)
genesis_ts = 1555390800

def dt2ts(d):
#     print('---------', d, '----------')
    d = str(d)
    ts = 0
    try:
        ts = mktime(datetime.datetime.strptime(d, "%Y%m%d%H%M").timetuple())
    except Exception as e:
#         pass
        print(e)
    return int(ts)

def get_main_activity(acts):
    if isinstance(acts, str):
        acts = acts.split('|')
    main_act = ''
    score = -1
    for act in acts:
        if activities[act]['class'] > score:
            score = activities[act]['class']
            main_act = act
        elif activities[act]['class'] == score:
            if not get_category(main_act) == 'people' and not get_category(act) == 'people':
                print('**Warning**', main_act, act)
            #else we keep the first people act
    return main_act

def get_category(act):
    cat = activities[act]['category']
    return cat

def get_multiactivity_time(day_activities):
    prev_start = genesis_ts
    prev_acts = 'write'

    acts_time = dict()
    for day in day_activities:
        for t_a in day_activities[day]:
            acts = '|'.join(t_a[1]) # get multiactivity
            start = t_a[0] # multiactivity start time
            if acts not in acts_time:
                acts_time[acts] = 0
            delta = start - prev_start
            acts_time[prev_acts] += start - prev_start # add the time delta
            prev_start = start
            prev_acts = acts
    return acts_time

def get_mainactivity_time(day_activities):
    prev_start = genesis_ts
    prev_acts = 'write'
    prev_act = get_main_activity(prev_acts)
    
    act_time = dict()
    for day in day_activities:
        for t_a in day_activities[day]:
            try:
                acts = '|'.join(t_a[1]) # get multiactivity
                main_act = get_main_activity(acts)
                start = t_a[0] # multiactivity start time
                if main_act not in act_time:
                    act_time[main_act] = 0
    #             print('types', type(start), type(prev_start))
                delta = start - prev_start
                act_time[prev_act] += start - prev_start # add the time delta
                prev_start = start
                prev_act = main_act
            except Exception as e:
                print(e)
                print(t_a, day)
    return act_time

def get_category_time(day_activities):
    prev_start = genesis_ts
    prev_acts = 'write'
    prev_act = get_main_activity(prev_acts)
    prev_cat = get_category(prev_act)

    cat_time = dict()
    for day in day_activities:
        for t_a in day_activities[day]:
            acts = '|'.join(t_a[1]) # get multiactivity
            main_act = get_main_activity(acts)
            cat = get_category(main_act)
            start = t_a[0] # multiactivity start time
            if cat not in cat_time:
                cat_time[cat] = 0
            delta = start - prev_start
            cat_time[prev_cat] += start - prev_start # add the time delta
            prev_start = start
            prev_cat = cat
    return cat_time

def get_people_time(day_activities):
    prev_start = genesis_ts
    prev_acts = 'write'
    prev_people = get_people(prev_acts)

    people_time = dict()
    for day in day_activities:
        for t_a in day_activities[day]:
            acts = '|'.join(t_a[1]) # get multiactivity
            people = get_people(acts)
            start = t_a[0] # multiactivity start time
            for prev_person in prev_people:
                if prev_person not in people_time:
                    people_time[prev_person] = 0
                delta = start - prev_start
                people_time[prev_person] += start - prev_start # add the time delta
                prev_start = start
                prev_people = people    
    return people_time

def get_hobby_time(day_activities):
    prev_start = genesis_ts
    prev_acts = 'write'
    prev_hobby = get_hobby(prev_acts)

    hobby_time = dict()
    for day in day_activities:
        for t_a in day_activities[day]:
            acts = '|'.join(t_a[1]) # get multiactivity
            hobby = get_hobby(acts)
            start = t_a[0] # multiactivity start time
            if hobby not in hobby_time:
                hobby_time[hobby] = 0
            delta = start - prev_start
            hobby_time[prev_hobby] += start - prev_start # add the time delta
            prev_start = start
            prev_hobby = hobby
    hobby_time.pop('', None)
    return hobby_time

def get_other_time(day_activities):
    prev_start = genesis_ts
    prev_acts = 'write'
    prev_other = get_other(prev_acts)

    other_time = dict()
    for day in day_activities:
        for t_a in day_activities[day]:
            acts = '|'.join(t_a[1]) # get multiactivity
            other = get_other(acts)
            start = t_a[0] # multiactivity start time
            if other not in other_time:
                other_time[other] = 0
            delta = start - prev_start
            other_time[prev_other] += start - prev_start # add the time delta
            prev_start = start
            prev_other = other
    other_time.pop('', None)
    return other_time

def get_food_time(day_activities):
    prev_start = genesis_ts
    prev_acts = 'write'
    prev_food = get_food(prev_acts)

    food_time = dict()
    for day in day_activities:
        for t_a in day_activities[day]:
            acts = '|'.join(t_a[1]) # get multiactivity
            food = get_food(acts)
            start = t_a[0] # multiactivity start time
            if food not in food_time:
                food_time[food] = 0
            delta = start - prev_start
            food_time[prev_food] += start - prev_start # add the time delta
            prev_start = start
            prev_food = food
    food_time.pop('', None)
    return food_time


def ts2date(ts):
    return datetime.datetime.fromtimestamp(int(ts)).strftime('%Y-%m-%d %H:%M:%S')

def contains_activity(acts, act):
    if act in acts[1]:
        return True
    return False

def get_day_activity_time(day_activities, act):
    # adds up the time spent doing activity act on a day
    # the activity must end before the end of the day to be considered in that day
    # otherwise it is considered in the next day
    # output is in seconds
    
    start_activity_ts = 0
    day_activity = dict()
    days = [int(day) for day in day_activities.keys()]
    days.sort()
    for day in days:
        day = str(day)
        if day not in day_activity:
            day_activity[day] = 0
        
        in_previous_line = False # if we have the same act in two subsequent lines
        for cnt, acts in enumerate(day_activities[day]):
            if contains_activity(acts, act):
                if not in_previous_line:
                    start_activity_ts = acts[0]
                    in_previous_line = True
            elif start_activity_ts:
                in_previous_line = False
                delta = acts[0] - start_activity_ts
                day_activity[day] += delta
                start_activity_ts = 0
    return day_activity

def get_day_category_time(day_activities, cat):
    # adds up the time spent doing category cat on a day
    # the category must end before the end of the day to be considered in that day
    # otherwise it is considered in the next day
    
    start_category_ts = 0
    day_category = dict()
    days = [int(day) for day in day_activities.keys()]
    days.sort()
    for day in days:
        day = str(day)
        if day not in day_category:
            day_category[day] = 0

        for acts in day_activities[day]:
            # get main act
            main_act = get_main_activity(acts[1])
            # get cat of main act
            category = get_category(main_act)
            if cat == category:
                start_category_ts = acts[0]
            elif start_category_ts:
                delta = acts[0] - start_category_ts
                day_category[day] += delta
                start_category_ts = 0
    return day_category

def get_people(acts):
    people = []
    for act in acts.split('|'):
        cat = get_category(act)
        if cat == 'people':
            people.append(act)
    if not people:
        return ['alone']
    return people

def get_hobby(acts):
    hobby = ''
    for act in acts.split('|'):
        cat = get_category(act)
        if cat == 'hobbies':
            hobby = act
    return hobby

def get_other(acts):
    other = ''
    for act in acts.split('|'):
        cat = get_category(act)
        if cat == 'other':
            other = act
    return other

def get_food(acts):
    food = ''
    for act in acts.split('|'):
        cat = get_category(act)
        if cat == 'food':
            food = act
    return food


In [None]:
def date2ts(dt):
    return mktime(datetime.datetime.strptime(dt, "%Y%m%d").timetuple())

In [None]:
# tlgrm_activities = set()
# day_activities = dict()

# for soup in soups:
#     text = soup.find_all("div", {"class":"text"})
#     for tag in text:
#         line = tag.get_text().strip()
#         if line[0] == '+':
#             day = line[2:]
#             print('new day', day)
#             day_activities[day] = []
#         elif line[0] == '-':
#             if line[1] != ' ':
#                 print('Put space, stupid!', line)
#             else:
# #                 try:
#                 time_acts_list = re.split('- |\+ ', line)[1:] # exclude first empty element
#                 for ta in time_acts_list: 
#                     # ta is 2204 relax friends
#                     # or 20190708
#                     try:
#                         day = str(int(ta))
#                         print('new day', day)
#                         day_activities[day] = []
#                     except Exception as e: # not an int
# #                         print(e)
#                         single_time_acts = ta.split(' ')
#         #                     splt = line[2:].split(' ')
#                         for act in single_time_acts[1:]: # select acts only, splt[0] is time
                            
#                             tlgrm_activities.add(act.lower())
#                             print('day + single_time_acts[0]', day + single_time_acts[0])
#                         ts = dt2ts(day + single_time_acts[0])
#                         day_activities[day].append([ts, sorted([el.lower() for el in single_time_acts[1:]])])
#         #                 except Exception as e:
#         #                     print(e)

In [None]:
def is_time(word):
    if len(word) == 4:
        for char in word:
            if not char.isdigit():
                return False
        return True
    return False
def is_date(word):
    try:
        word = str(int(word))
        if len(word) == 8:
            return True
        return False
    except:
        return False

def is_act(word):
    if is_date(word) or is_time(word):
        return False
    if word:
        return True
    return False

In [None]:
tlgrm_activities = set()
day_activities = dict()

time_acts = []
day = ''
for soup in soups:
    text = soup.find_all("div", {"class":"text"})
    for tag in text:
        line = tag.get_text().strip()
        if line[0] in ['-', '+']:
            words = re.split('- |\+ | ', line)[1:]

            for word in words:
                # we need the first day
                if is_date(word):
                    day = word
    #                 print('day found', day)
                    day_activities[day] = []
                if day:
                    if is_time(word):
                        time = dt2ts(day + word)
                        # store current time_acts                        
                        time_acts = [time, []]
                        day_activities[day].append(time_acts)
                    elif is_act(word): # is act
                        act = word.lower()
    #                     print('act found', act)
                        day_activities[day][-1][1].append(act)
                # else go to next word and look for day

In [None]:
# when an activity starts before midnight and ends after midnight, it belongs to the previous day

In [None]:
# multiactivity_time = get_multiactivity_time(day_activities)
mainactivity_time = get_mainactivity_time(day_activities)
category_time = get_category_time(day_activities)
people_time = get_people_time(day_activities)
hobby_time = get_hobby_time(day_activities)
other_time = get_other_time(day_activities)
food_time = get_food_time(day_activities)

In [None]:
# acts_list = []
# for ma in multiactivity_time:
#     mins = multiactivity_time[ma]/60
#     hours = round(mins/60, 2)
#     acts_list.append([ma, mins, hours])
# acts_df = pd.DataFrame(acts_list, columns=['activities', 'minutes', 'hours'])
# tot_minutes = sum(acts_df.minutes.values)
# acts_df['perc'] = acts_df.apply(lambda x: 100*x[1]/tot_minutes, axis=1)
# acts_df.sort_values('perc', ascending=False)
# acts_df = acts_df.sort_values('perc', ascending=False)

In [None]:
act_list = []
for ma in mainactivity_time:
    mins = mainactivity_time[ma]/60
    hours = round(mins/60, 2)
    act_list.append([ma, mins, hours])
act_df = pd.DataFrame(act_list, columns=['activity', 'minutes', 'hours'])
tot_minutes = sum(act_df.minutes.values)
act_df['perc'] = act_df.apply(lambda x: 100*x[1]/tot_minutes, axis=1)
act_df = act_df.sort_values('perc', ascending=False)

In [None]:
cat_list = []
for cat in category_time:
    mins = category_time[cat]/60
    hours = round(mins/60, 2)
    cat_list.append([cat, mins, hours])
cat_df = pd.DataFrame(cat_list, columns=['category', 'minutes', 'hours'])
tot_minutes = sum(cat_df.minutes.values)
cat_df['perc'] = cat_df.apply(lambda x: 100*x[1]/tot_minutes, axis=1)
cat_df = cat_df.sort_values('perc', ascending=False)

In [None]:
people_list = []
for p in people_time:
    mins = people_time[p]/60
    hours = round(mins/60, 2)
    people_list.append([p, mins, hours])
people_df = pd.DataFrame(people_list, columns=['person', 'minutes', 'hours'])
tot_minutes = sum(people_df.minutes.values)
people_df['perc'] = people_df.apply(lambda x: 100*x[1]/tot_minutes, axis=1)
people_df = people_df.sort_values('perc', ascending=False)

In [None]:
hobby_list = []
for p in hobby_time:
    mins = hobby_time[p]/60
    hours = round(mins/60, 2)
    hobby_list.append([p, mins, hours])
hobby_df = pd.DataFrame(hobby_list, columns=['hobby', 'minutes', 'hours'])
tot_minutes = sum(hobby_df.minutes.values)
hobby_df['perc'] = hobby_df.apply(lambda x: 100*x[1]/tot_minutes, axis=1)
hobby_df = hobby_df.sort_values('perc', ascending=False)

In [None]:
other_list = []
for p in other_time:
    mins = other_time[p]/60
    hours = round(mins/60, 2)
    other_list.append([p, mins, hours])
other_df = pd.DataFrame(other_list, columns=['other', 'minutes', 'hours'])
tot_minutes = sum(other_df.minutes.values)
other_df['perc'] = other_df.apply(lambda x: 100*x[1]/tot_minutes, axis=1)
other_df = other_df.sort_values('perc', ascending=False)

In [None]:
food_list = []
for p in food_time:
    mins = food_time[p]/60
    hours = round(mins/60, 2)
    food_list.append([p, mins, hours])
food_df = pd.DataFrame(food_list, columns=['food', 'minutes', 'hours'])
tot_minutes = sum(food_df.minutes.values)
food_df['perc'] = food_df.apply(lambda x: 100*x[1]/tot_minutes, axis=1)
food_df = food_df.sort_values('perc', ascending=False)

In [None]:
figS = 8

In [None]:
plt.figure(figsize=(figS,figS))
ax = plt.gca()
patches, texts, autotexts = ax.pie(cat_df.perc, labels=[c.upper() for c in cat_df.category], autopct='%1.1f%%', radius=1.3, pctdistance=.9, labeldistance=1)
plt.setp(patches, width=0.3, edgecolor='white')

plt.title('How I spent my time since\n'.upper() + ts2date(genesis_ts)[:10], y=.45, fontsize=20)
plt.savefig('../images/categories.pdf', bbox_inches='tight')
plt.savefig('../images/categories.png', bbox_inches='tight')
plt.show()



In [None]:
plt.figure(figsize=(figS,figS))
ax = plt.gca()
# ptchs, texts, autotexts = ax.pie(cat_df.perc, labels=cat_df.category, autopct='%1.1f%%', radius=1.0, pctdistance=.85, labeldistance=1)
# plt.setp(ptchs, width=0.3, edgecolor='white')

ptchs, texts, autotexts = ax.pie(food_df.perc, labels=['\n'.join(c.upper().split('_')) for c in food_df.food], autopct='%1.1f%%', radius=1.0, pctdistance=.85, labeldistance=1)
plt.setp(ptchs, width=0.3, edgecolor='white')

ptchs, texts, autotexts = ax.pie(people_df.perc, labels=['\n'.join(c.upper().split('_')) for c in people_df.person], autopct='%1.1f%%', radius=1.8, pctdistance=.91, labeldistance=1)
[ _.set_fontsize(13) for _ in texts]
plt.setp(ptchs, width=0.3, edgecolor='white')

ptchs, texts, autotexts = ax.pie(hobby_df.perc, labels=['\n'.join(c.upper().split('_')) for c in hobby_df.hobby], autopct='%1.1f%%', radius=2.6, pctdistance=.93, labeldistance=1)
[ _.set_fontsize(18) for _ in texts]
plt.setp(ptchs, width=0.3, edgecolor='white')

ptchs, texts, autotexts = ax.pie(other_df.perc, labels=['\n'.join(c.upper().split('_')) for c in other_df.other], autopct='%1.1f%%', radius=3.4, pctdistance=.95, labeldistance=1)
[ _.set_fontsize(25) for _ in texts]
plt.setp(ptchs, width=0.3, edgecolor='white')

plt.title('From inner to\nouter donut:\n1 - food\n2 - company\n3 - hobbies\n4 - other', y=.35, fontsize='17')
plt.savefig('../images/donuts.pdf', bbox_inches = 'tight')
plt.savefig('../images/donuts.png', bbox_inches = 'tight')

# my_circle = plt.Circle( (0,0), 0.05, color='white')
# p = plt.gcf()
# p.gca().add_artist(my_circle)
plt.show()

In [None]:
# plt.figure(figsize=(figS,figS))
# ax = plt.gca()
# patches, texts, autotexts = ax.pie(act_df.perc, labels=act_df.activity, autopct='%1.1f%%', radius=0.3)
# plt.axis('equal')
# my_circle=plt.Circle( (0,0), 0.1, color='white')
# p=plt.gcf()
# p.gca().add_artist(my_circle)
# plt.show()

In [None]:
# plt.figure(figsize=(figS,figS))
# ax = plt.gca()
# patches, texts, autotexts = ax.pie(acts_df.perc, labels=acts_df.activities, autopct='%1.1f%%', radius=0.3)
# plt.axis('equal')
# my_circle=plt.Circle( (0,0), 0.1, color='white')
# p=plt.gcf()
# p.gca().add_artist(my_circle)
# plt.show()

In [None]:
# plt.figure(figsize=(figS,figS))
# ax = plt.gca()
# patches, texts, autotexts = ax.pie(people_df.perc, labels=people_df.person, autopct='%1.1f%%', radius=0.3)
# plt.axis('equal')
# my_circle=plt.Circle( (0,0), 0.1, color='white')
# p=plt.gcf()
# p.gca().add_artist(my_circle)
# plt.show()

In [None]:
categories = dict()
for a in activities:
    cat = activities[a]['category']
    if cat not in categories:
        categories[cat] = []
    categories[cat].append(a)

In [None]:
other_time

In [None]:
category_color = {
    'sleep': '#B2B2B2',
    'toilet': '#A52A2A',
    'food': '#FF0000',
    'transport': '#FFA500',
    'work': '#EAEA00',
    'other': '#0000FF',
    'hygiene': '#00FFFF',
    'people': '#FF7AE6',
    'hobbies': '#00C600',
    'internet': '#000000',
}

In [None]:
# for acts in acts_df.activities:
#     print(acts, '--->',get_main_activity(acts))

In [None]:
# for i in range(4):
#     print(i)
#     for act in activities:
#         if i == activities[act]['class']:
#             print(act)

In [None]:
import matplotlib.patches as patches


In [None]:
def get_category_color(cat):
    return(category_color[cat])

In [None]:
# for day in day_activities:
#     color = 'k'
#     for el in day_activities[day]:
#         if 'hygiene' in el[1]:
#             color = 'r'
#             break
#     print(day, color)


In [None]:
small_day_activities = dict()
# for k in list(day_activities.keys())[::-1][:2]:
for k in ['20200206']:
    small_day_activities[k] = day_activities[k]

In [None]:
# weekly_work_time = 0
# n_weeks = 12
# day_activity_time = get_day_activity_time(day_activities, 'work')
# for i, day in enumerate(list(day_activities.keys())[::-1][:7*n_weeks]):
#     if not i%7:
#         if int(i/7):
#             print('.....', int(i/7), 'week:', round(weekly_work_time/3600, 2), '.....')
#         weekly_work_time = 0
#     wt = day_activity_time[day]
#     weekly_work_time += wt
#     print(day, 'daily hours:', wt/3600)    
# print('.....', int(i/7), 'week:', round(weekly_work_time/3600, 2), '.....')
# # 2 days below ..... are weekend

In [None]:
weekly_work_time = 0
# n_weeks = 1
day_activity_time = get_day_activity_time(day_activities, 'work')
weekly_hours = dict()
# for i, day in enumerate(list(day_activities.keys())[::-1][:7*n_weeks]):
for i, day in enumerate(list(day_activities.keys())[::-1]):
    if not i%7:
        if int(i/7):
            weekly_hours[day] = round(weekly_work_time/3600, 2)
#             print('.....', int(i/7), 'week:', round(weekly_work_time/3600, 2), '.....')
        weekly_work_time = 0
    wt = day_activity_time[day]
    weekly_work_time += wt
weekly_hours[day] = round(weekly_work_time/3600, 2)
#     print(day, 'daily hours:', wt/3600)    
# print('.....', int(i/7), 'week:', round(weekly_work_time/3600, 2), '.....')

In [None]:
[v for v in weekly_hours.values()]

In [None]:
fs = 20
plt.figure(figsize=(15,7))
# plt.fill_between([i for i in range(len(weekly_hours))], 
#                  [v if i < 3 else 0 for i, v in enumerate(weekly_hours)], 
#                  color='r',
#                  where=y > 0)
plt.axvline(x=0, label='quarantene', color='r', linewidth=4)
plt.axvline(x=1, color='r', linewidth=4)
plt.axvline(x=2, color='r', linewidth=4)
plt.plot([v for v in weekly_hours.values()], linewidth=4)
plt.title('weekly hours of work', fontsize=fs)
plt.xlabel('weeks', fontsize=fs)
plt.legend(fontsize=fs)
plt.yticks(fontsize=fs)
plt.xticks(fontsize=fs)
plt.grid()
plt.show()


In [None]:
fig1 = plt.figure(figsize=(15,len(day_activities)))
ax1 = fig1.add_subplot(111, aspect='equal')
h = 3000
prev_acts = ['sleep']
prev_act = get_main_activity(prev_acts)
prev_cat = get_category(prev_act)
origin = (0, -h)
day_colors = []
for day in day_activities:
    origin = (0, origin[1] + h) # move up for new day
    midnight = dt2ts(day + '0000')
    prev_start = midnight
    day_color = 'r'
    for acts in day_activities[day]:
        if 'hygiene' in acts[1]:
            day_color = 'b'
        main_act = get_main_activity(acts[1]) # get next act
        delta = acts[0] - prev_start # compute time delta
        cat = get_category(main_act) # get next cat
        color = get_category_color(prev_cat) # color of prev cat
        ax1.add_patch(patches.Rectangle(origin, delta, h, color=color))
        # update
        prev_start = acts[0]
        prev_act = main_act
        prev_cat = cat
        origin = (origin[0] + delta, origin[1])
    # add last activity of the day
    delta = midnight + 60*60*24 - acts[0]
    # draw delta using prev_act
    color = get_category_color(cat)
    ax1.add_patch(patches.Rectangle(origin, delta, h, color=color))
    day_colors.append(day_color)
x_d = 1000
plt.text(x_d, -h, '0', fontsize=16)
for i in range(1,24):
    plt.axvline(3600*i, color='white')
    plt.text(3600*i + x_d, -h, str(i), fontsize=16)

y_d = 1000
dt = list(day_activities.keys())[0]
s = dt[:4] + '-' + dt[4:6] + '-' + dt[6:8]
plt.text(-10000, y_d, s, fontsize=16, color=day_colors[0])
for i in range(1,len(day_activities)):
    dt = list(day_activities.keys())[i]
    s = dt[:4] + '-' + dt[4:6] + '-' + dt[6:8]
    plt.text(-10000, h*i + y_d, s, fontsize=16, color=day_colors[i])
    plt.axhline(h*i, color='white')
    if datetime.datetime.fromtimestamp((date2ts(dt))).weekday() > 4:
        plt.axhline(h*i + h/2, color='black')

plt.ylim((0, origin[1] + h))
plt.xlim((0, 60*60*24))
plt.axis('off')
plt.savefig('../images/table.pdf', bbox_inches='tight')
plt.savefig('../images/table.png', bbox_inches='tight')
plt.show()

x_cat = 100
y_cat = 50
y_space = 20
fig1 = plt.figure()
ax1 = fig1.add_subplot(111, aspect='equal')
origin = (0, 0)
for cat in category_color:
    plt.text(origin[0] + x_cat + x_cat/3, origin[1], cat.upper(), fontsize=16)
    ax1.add_patch(patches.Rectangle(origin, x_cat, y_cat, color=get_category_color(cat)))
    # move origin above
    origin = (0, origin[1] + y_cat + y_space)


plt.ylim((0, origin[1]))
plt.xlim((0, x_cat))
plt.axis('off')
plt.savefig('../images/legend.pdf', bbox_inches='tight')
plt.savefig('../images/legend.png', bbox_inches='tight')
plt.show()


In [None]:
# for el in day_activities['20190816']:
#     print(ts2date(el[0]), el[1])

In [None]:
plt.figure(figsize=(int(len(day_activities)/4),8))
x = [i for i in range(len(day_activities))]
# for cat in ['work']:
for cat in ['sleep', 'work', 'food', 'hobbies']:
    vs = get_day_category_time(day_activities, cat)
    y = [v/3600 for v in vs.values()]
    for i in range(len(day_activities)-len(y)):
        y.append(0)
    plt.plot(x, y, '-o', label=cat, linewidth='3')
plt.xticks(x, [d[:4] + '-' + d[4:6] + '-' + d[6:] for d in day_activities.keys()], rotation=90)
plt.legend()
plt.grid()
plt.ylabel('Hours')
plt.savefig('../images/time_series_main_categories.pdf', bbox_inches='tight')
plt.show()


In [None]:
tot_act_time = 0
for v in get_day_activity_time(day_activities, 'side_project').values():
    tot_act_time += v

In [None]:
tot_act_time/60 # minutes

In [None]:
tot_act_time/3600 # hours

In [None]:
# binwidth = 5
# bins = range(int(min(values)), int(max(values)) + binwidth, binwidth)
# plt.figure(figsize=(9,4))
# for i in range(len(values)):
# #     plt.hist(values, facecolor='g')
#     plt.hist(values[:i], bins, align='left', facecolor='g')
#     plt.pause(0.05)
#     plt.show()


In [None]:
split_day_act_hour_dict = dict() # doing activity act after this hour is considered in the next day
split_day_act_hour_dict['breakfast'] = 18
split_day_act_hour_dict['lunch'] = split_day_act_hour_dict['breakfast'] + 5
split_day_act_hour_dict['dinner'] = split_day_act_hour_dict['lunch'] + 8
split_day_act_hour_dict['sleep'] = split_day_act_hour_dict['breakfast'] - 8
# split_day_act_hour_dict['sleep'] = 14
for act in split_day_act_hour_dict:
    act_timestamps = []
    seconds_to_midnight_list = [] # beginning_current_day_timestamp - act_time
    for day in day_activities:
        for el in day_activities[day]:
            if act in el[1]:
                seconds_to_midnight = el[0] - date2ts(day)
                if seconds_to_midnight > 60*60*split_day_act_hour_dict[act]:
                    seconds_to_midnight = seconds_to_midnight - 60*60*24
    #                 print(ts2date(date2ts(day)), ts2date(el[0]), seconds_to_midnight)
                seconds_to_midnight_list.append(seconds_to_midnight)
    values = [e/3600 for e in seconds_to_midnight_list]
    binwidth = 1
    bins = range(int(min(values)), int(max(values)) + binwidth, binwidth)
    plt.figure(figsize=(9,4))
    plt.title(act)
    plt.hist(values[:i], bins, align='left', facecolor='g')
    plt.show()
