# Retrieve Activities

This Jupyter notebook processes the [UK Time Use study data from 2000](https://discover.ukdataservice.ac.uk/catalogue?sn=4504) and brings it into a format readible by [people](https://github.com/timtroendle/people).

In [None]:
from enum import Enum
from pathlib import Path

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from pytus2000 import read_diary_file, diary, read_individual_file, individual
%matplotlib inline

In [None]:
DATA_FOLDER_PATH = Path('./data/UKDA-4504-tab/')

## Read Data

In [None]:
diary_data = read_diary_file(DATA_FOLDER_PATH / 'tab' / 'diary_data_8.tab')
diary_data.ACT1_001.head()

In [None]:
individual_data = read_individual_file(DATA_FOLDER_PATH / 'tab' / 'Individual_data_5.tab')
individual_data.Q1A.head()

## Helper Functions

In [None]:
def print_full(x):
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows')

In [None]:
from datetime import datetime, timedelta

def time_mapper(time_interval):
    """Maps the time interval as given in the study to hour and minute.
    
    e.g. '001' -> (4, 00)
    e.g. '144' -> (3, 50)
    e.g. 1     -> (4, 00)
    """
    time_interval = int(time_interval)
    if time_interval < 1 or time_interval > 144:
        raise ValueError('Invalid time invertal {}. Must be between 1 and 144.'.format(time_interval))
    time_since_four = timedelta(minutes=(int(time_interval) - 1) * 10)
    four = datetime(2000, 6, 10, 4, 0) # date does not matter
    time = four + time_since_four
    return time.hour, time.minute

assert time_mapper('001') == (4, 0)
assert time_mapper('007') == (5, 0)
assert time_mapper('144') == (3, 50)
assert time_mapper(1) == (4, 0)
assert time_mapper(7) == (5, 0)
assert time_mapper(144) == (3, 50)

## Transforming Activities and Locations into TimeSeries

In [None]:
ac_loc = pd.DataFrame(
    {
        'activity': diary_data['ACT1_001'],
        'location': diary_data['WHER_001'],
        'secondary_activity': diary_data['ACT2_001'],
        'hour': pd.Series([4] * len(diary_data), index=diary_data.index),
        'minute': pd.Series([0] * len(diary_data), index=diary_data.index)
    },
    dtype='category'
)
ac_loc.info()

In [None]:
ac_loc.head()

In [None]:
for i in range(1, 144):
    hour, minute = time_mapper(i + 1)
    if np.isnan(hour) or np.isnan(minute) or hour < 0 or hour > 23 or minute < 0 or minute > 59:
        msg = 'Something went wront. Interval {}, hour {}, minute {}'.format(i, hour, minute)
        raise ValueError(msg)
    next_frame = pd.DataFrame(
        {
            'activity' : diary_data['ACT1_{:0>3}'.format(i + 1)],
            'location' : diary_data['WHER_{:0>3}'.format(i + 1)],
            'secondary_activity' : diary_data['ACT2_{:0>3}'.format(i + 1)],
            'hour' : pd.Series([hour] * len(diary_data), index=diary_data.index),
            'minute' : pd.Series([minute] * len(diary_data), index=diary_data.index),
        },
        dtype='category'
    )
    ac_loc = ac_loc.append(next_frame, ignore_index=False)
ac_loc.info()

In [None]:
ac_loc['activity'] = ac_loc['activity'].astype('category', categories=[ac for ac in diary.ACT1_001])
ac_loc['secondary_activity'] = ac_loc['secondary_activity'].astype('category', categories=[ac for ac in diary.ACT2_001])
ac_loc['location'] = ac_loc['location'].astype('category', categories=[loc for loc in diary.WHER_001])
ac_loc['hour'] = ac_loc['hour'].astype(np.int8)
ac_loc['minute'] = ac_loc['minute'].astype(np.int8)
ac_loc.info()

In [None]:
ac_loc = ac_loc.set_index(['hour', 'minute'], drop=True, append=True)
ac_loc.info()

In [None]:
ac_loc = ac_loc.sort_index()

In [None]:
ac_loc.head(15)

### Slicing Example: Choose only adults

In [None]:
mask = diary_data.DTYPE == diary.DTYPE.ADULT_DIARY
mask.head()

In [None]:
ac_loc_mask = mask.reindex(ac_loc.index, method='ffill')
ac_loc_mask.head()

In [None]:
ac_loc[ac_loc_mask].head()

In [None]:
assert len(diary_data[mask]) * 144 == len(ac_loc[ac_loc_mask])

## Analysing Activities and Locations

How important is an activity in the data set? Let's have a look at the average time spent per day on certain activities.

In [None]:
print_full(ac_loc.groupby('activity').location.count().sort_values())

In [None]:
print_full(ac_loc.groupby('location').activity.count().sort_values())

## Defining and Mapping Locations

What we are actually interested in is not so much activities, but mostly locations of people. So let's look at locations first.

In [None]:
class Location(Enum):
    """Simplified locations."""
    HOME = 1
    OTHER_HOME = 2
    WORK_OR_SCHOOL = 3
    RESTO = 4
    SPORTS_FACILITY = 5
    ARTS_OR_CULTURAL_CENTRE = 6
    OUTSIDE = 7
    TRAVELLING = 8
    UNKNOWN = 9
    IMPLICIT = 10

In [None]:
location_map = {
    diary.WHER_001.MAIN_ACTVTY_EQUAL_SLEEPWORKSTUDY___NO_CODE_REQUIRED : Location.IMPLICIT,
    diary.WHER_001._MISSING : Location.UNKNOWN,
    diary.WHER_001._UNSPECIFIED_LOCATION : Location.UNKNOWN,
    diary.WHER_001._UNSPECIFIED_LOCATION_NOT_TRAVELLING : Location.UNKNOWN,
    diary.WHER_001._HOME : Location.HOME,
    diary.WHER_001._SECOND_HOME_OR_WEEKEND_HOUSE : Location.OTHER_HOME,
    diary.WHER_001._WORKING_PLACE_OR_SCHOOL : Location.WORK_OR_SCHOOL,
    diary.WHER_001._OTHER_PEOPLE_S_HOME : Location.OTHER_HOME,
    diary.WHER_001._RESTAURANT__CAFÉ_OR_PUB : Location.RESTO,
    diary.WHER_001._SPORTS_FACILITY : Location.SPORTS_FACILITY,
    diary.WHER_001._WHER_001__ARTS_OR_CULTURAL_CENTRE : Location.ARTS_OR_CULTURAL_CENTRE,
    diary.WHER_001._THE_COUNTRY_COUNTRYSIDE__SEASIDE__BEACH_OR_COAST : Location.OUTSIDE,
    diary.WHER_001._OTHER_SPECIFIED_LOCATION_NOT_TRAVELLING : Location.UNKNOWN,
    diary.WHER_001._UNSPECIFIED_PRIVATE_TRANSPORT_MODE : Location.TRAVELLING,
    diary.WHER_001._TRAVELLING_ON_FOOT : Location.TRAVELLING,
    diary.WHER_001._TRAVELLING_BY_BICYCLE : Location.TRAVELLING,
    diary.WHER_001._TRAVELLING_BY_MOPED__MOTORCYCLE_OR_MOTORBOAT : Location.TRAVELLING,
    diary.WHER_001._TRAVELLING_BY_PASSENGER_CAR_AS_THE_DRIVER : Location.TRAVELLING,
    diary.WHER_001._TRAVELLING_BY_PASSENGER_CAR_AS_A_PASSENGER : Location.TRAVELLING,
    diary.WHER_001._TRAVELLING_BY_PASSENGER_CAR_DRIVER_STATUS_UNSPECIFIED : Location.TRAVELLING,
    diary.WHER_001._TRAVELLING_BY_LORRY__OR_TRACTOR : Location.TRAVELLING,
    diary.WHER_001._TRAVELLING_BY_VAN : Location.TRAVELLING,
    diary.WHER_001._OTHER_SPECIFIED_PRIVATE_TRAVELLING_MODE : Location.TRAVELLING,
    diary.WHER_001._UNSPECIFIED_PUBLIC_TRANSPORT_MODE : Location.TRAVELLING,
    diary.WHER_001._TRAVELLING_BY_TAXI : Location.TRAVELLING,
    diary.WHER_001._TRAVELLING_BY_BUS : Location.TRAVELLING,
    diary.WHER_001._TRAVELLING_BY_TRAM_OR_UNDERGROUND : Location.TRAVELLING,
    diary.WHER_001._TRAVELLING_BY_TRAIN : Location.TRAVELLING,
    diary.WHER_001._TRAVELLING_BY_AEROPLANE : Location.TRAVELLING,
    diary.WHER_001._TRAVELLING_BY_BOAT_OR_SHIP : Location.TRAVELLING,
    diary.WHER_001._WHER_001__TRAVELLING_BY_COACH : Location.TRAVELLING,
    diary.WHER_001._WAITING_FOR_PUBLIC_TRANSPORT : Location.TRAVELLING,
    diary.WHER_001._OTHER_SPECIFIED_PUBLIC_TRANSPORT_MODE : Location.TRAVELLING,
    diary.WHER_001._UNSPECIFIED_TRANSPORT_MODE : Location.TRAVELLING,
    diary.WHER_001._ILLEGIBLE_LOCATION_OR_TRANSPORT_MODE : Location.UNKNOWN
}

Estimate the distribution of locations for a certain activity. How big is the share of the most common location of a certain activity?

In [None]:
def share_of_activity_time_at_most_common_location(group):
    locations = group.groupby('location').activity.count()
    most_common_location = locations.max()
    return most_common_location / locations.sum()

In [None]:
def number_of_locations(group):
    return len(group.location.unique())

In [None]:
ac_loc.groupby('activity').apply(share_of_activity_time_at_most_common_location).hist()
_ = plt.xlabel('Share of most common location for activity.')

In [None]:
ac_loc.groupby('activity').apply(number_of_locations).hist()
_ = plt.xlabel('Number of locations per activity.')

Half of all activities are performed by 87% at the most common location. In average, an activity is performed to 78% at the most common location.

Half of all activities are performed at 5 different locations or less. In average, there are 6.8 different locations per activity.

In [None]:
ac_simpleloc = ac_loc.applymap(lambda val: location_map[val] if isinstance(val, diary.WHER_001) else val)
ac_simpleloc['activity'] = ac_simpleloc['activity'].astype('category', categories=[ac for ac in diary.ACT1_001])
ac_simpleloc['location'] = ac_simpleloc['location'].astype('category', categories=[loc for loc in Location])

In [None]:
ac_simpleloc.groupby('activity').apply(share_of_activity_time_at_most_common_location).hist()
_ = plt.xlabel('Share of most common location for activity.')

In [None]:
ac_simpleloc.groupby('activity').apply(number_of_locations).hist(bins=len(Location)-1)
_ = plt.xlabel('Number of locations per activity.')

With reduced locations, half of all activities are performed by 93% at the most common location. In average, an activity is performed to 85% at the most common location.

Half of all activities are performed at 3 different locations or less. In average, there are 3.9 different locations per activity.

In [None]:
ac_simpleloc.groupby('location').activity.count().sort_values()

## Defining and Mapping Activities

In [None]:
class Activity(Enum):
    SLEEP = 1
    WORK_OR_STUDY = 2
    OTHER = 3
    UNKNOWN = 4

In [None]:
activity_map = {
    diary.ACT1_001.UNSPECIFIED_PERSONAL_CARE: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_SLEEP: Activity.SLEEP,
    diary.ACT1_001.SLEEP: Activity.SLEEP,
    diary.ACT1_001.SICK_IN_BED: Activity.SLEEP,
    diary.ACT1_001.EATING: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_OTHER_PERSONAL_CARE: Activity.OTHER,
    diary.ACT1_001.WASH_AND_DRESS: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_PERSONAL_CARE: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_EMPLOYMENT: Activity.WORK_OR_STUDY,
    diary.ACT1_001.WORKING_TIME_IN_MAIN_JOB: Activity.WORK_OR_STUDY,
    diary.ACT1_001.COFFEE_AND_OTHER_BREAKS_IN_MAIN_JOB: Activity.OTHER,
    diary.ACT1_001.WORKING_TIME_IN_SECOND_JOB: Activity.WORK_OR_STUDY,
    diary.ACT1_001.COFFEE_AND_OTHER_BREAKS_IN_SECOND_JOB: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_ACTIVITIES_RELATED_TO_EMPLOYMENT: Activity.WORK_OR_STUDY,
    diary.ACT1_001.LUNCH_BREAK: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_ACTIVITIES_RELATED_TO_EMPLOYMENT: Activity.WORK_OR_STUDY,
    diary.ACT1_001.ACTIVITIES_RELATED_TO_JOB_SEEKING: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_ACTIVITIES_RELATED_TO_EMPLOYMENT2: Activity.WORK_OR_STUDY,
    diary.ACT1_001.UNSPECIFIED_STUDY: Activity.WORK_OR_STUDY,
    diary.ACT1_001.UNSPECIFIED_ACTIVITIES_RELATED_TO_SCHOOL_OR_UNIVERSITY: Activity.WORK_OR_STUDY,
    diary.ACT1_001.CLASSES_AND_LECTURES: Activity.WORK_OR_STUDY,
    diary.ACT1_001.HOMEWORK: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_ACTIVITIES_RELATED_TO_SCHOOL_OR_UNIVERSITY: Activity.WORK_OR_STUDY,
    diary.ACT1_001.FREE_TIME_STUDY: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_HOUSEHOLD_AND_FAMILY_CARE: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_FOOD_MANAGEMENT: Activity.OTHER,
    diary.ACT1_001.FOOD_PREPARATION: Activity.OTHER,
    diary.ACT1_001.BAKING: Activity.OTHER,
    diary.ACT1_001.DISH_WASHING: Activity.OTHER,
    diary.ACT1_001.PRESERVING: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_FOOD_MANAGEMENT: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_HOUSEHOLD_UPKEEP: Activity.OTHER,
    diary.ACT1_001.CLEANING_DWELLING: Activity.OTHER,
    diary.ACT1_001.CLEANING_YARD: Activity.OTHER,
    diary.ACT1_001.HEATING_AND_WATER: Activity.OTHER,
    diary.ACT1_001.VARIOUS_ARRANGEMENTS: Activity.OTHER,
    diary.ACT1_001.DISPOSAL_OF_WASTE: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_HOUSEHOLD_UPKEEP: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_MAKING_AND_CARE_FOR_TEXTILES: Activity.OTHER,
    diary.ACT1_001.LAUNDRY: Activity.OTHER,
    diary.ACT1_001.IRONING: Activity.OTHER,
    diary.ACT1_001.HANDICRAFT_AND_PRODUCING_TEXTILES: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_MAKING_AND_CARE_FOR_TEXTILES: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_GARDENING_AND_PET_CARE: Activity.OTHER,
    diary.ACT1_001.GARDENING: Activity.OTHER,
    diary.ACT1_001.TENDING_DOMESTIC_ANIMALS: Activity.OTHER,
    diary.ACT1_001.CARING_FOR_PETS: Activity.OTHER,
    diary.ACT1_001.WALKING_THE_DOG: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_GARDENING_AND_PET_CARE: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_CONSTRUCTION_AND_REPAIRS: Activity.OTHER,
    diary.ACT1_001.HOUSE_CONSTRUCTION_AND_RENOVATION: Activity.OTHER,
    diary.ACT1_001.REPAIRS_OF_DWELLING: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_MAKING__REPAIRING_AND_MAINTAINING_EQUIPMENT: Activity.OTHER,
    diary.ACT1_001.WOODCRAFT__METAL_CRAFT__SCULPTURE_AND_POTTERY: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_MAKING__REPAIRING_AND_MAINTAINING_EQUIPMENT: Activity.OTHER,
    diary.ACT1_001.VEHICLE_MAINTENANCE: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_CONSTRUCTION_AND_REPAIRS: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_SHOPPING_AND_SERVICES: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_SHOPPING: Activity.OTHER,
    diary.ACT1_001.SHOPPING_MAINLY_FOR_FOOD: Activity.OTHER,
    diary.ACT1_001.SHOPPING_MAINLY_FOR_CLOTHING: Activity.OTHER,
    diary.ACT1_001.SHOPPING_MAINLY_RELATED_TO_ACCOMMODATION: Activity.OTHER,
    diary.ACT1_001.SHOPPING_OR_BROWSING_AT_CAR_BOOT_SALES_OR_ANTIQUE_FAIRS: Activity.OTHER,
    diary.ACT1_001.WINDOW_SHOPPING_OR_OTHER_SHOPPING_AS_LEISURE: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_SHOPPING: Activity.OTHER,
    diary.ACT1_001.COMMERCIAL_AND_ADMINISTRATIVE_SERVICES: Activity.OTHER,
    diary.ACT1_001.PERSONAL_SERVICES: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_SHOPPING_AND_SERVICES: Activity.OTHER,
    diary.ACT1_001.HOUSEHOLD_MANAGEMENT_NOT_USING_THE_INTERNET: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_HOUSEHOLD_MANAGEMENT_USING_THE_INTERNET: Activity.OTHER,
    diary.ACT1_001.SHPING_FORANDORDRING_UNSPEC_GDSANDSRVS_VIA_INTERNET: Activity.OTHER,
    diary.ACT1_001.SHPING_FORANDORDRING_FOOD_VIA_THE_INTERNET: Activity.OTHER,
    diary.ACT1_001.SHPING_FORANDORDRING_CLOTHING_VIA_THE_INTERNET: Activity.OTHER,
    diary.ACT1_001.SHPING_FORANDORDRING_GDSANDSRV_RELATED_TO_ACC_VIA_INTERNET: Activity.OTHER,
    diary.ACT1_001.SHPING_FORANDORDRING_MASS_MEDIA_VIA_THE_INTERNET: Activity.OTHER,
    diary.ACT1_001.SHPING_FORANDORDRING_ENTERTAINMENT_VIA_THE_INTERNET: Activity.OTHER,
    diary.ACT1_001.BANKING_AND_BILL_PAYING_VIA_THE_INTERNET: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_HOUSEHOLD_MANAGEMENT_USING_THE_INTERNET: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_CHILDCARE: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_PHYSICAL_CARE_AND_SUPERVISION_OF_A_CHILD: Activity.OTHER,
    diary.ACT1_001.FEEDING_THE_CHILD: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_PHYSICAL_CARE_AND_SUPERVISION_OF_A_CHILD: Activity.OTHER,
    diary.ACT1_001.TEACHING_THE_CHILD: Activity.OTHER,
    diary.ACT1_001.READING__PLAYING_AND_TALKING_WITH_CHILD: Activity.OTHER,
    diary.ACT1_001.ACCOMPANYING_CHILD: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_CHILDCARE: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_HELP_TO_AN_ADULT_HOUSEHOLD_MEMBER: Activity.OTHER,
    diary.ACT1_001.PHYSICAL_CARE_AND_SUPERVISION_OF_AN_ADULT_HOUSEHOLD_MEMBER: Activity.OTHER,
    diary.ACT1_001.ACCOMPANYING_AN_ADULT_HOUSEHOLD_MEMBER: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_HELP_TO_AN_ADULT_HOUSEHOLD_MEMBER: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_VOLUNTEER_WORK_AND_MEETINGS: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_ORGANISATIONAL_WORK: Activity.OTHER,
    diary.ACT1_001.WORK_FOR_AN_ORGANISATION: Activity.OTHER,
    diary.ACT1_001.VOLUNTEER_WORK_THROUGH_AN_ORGANISATION: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_ORGANISATIONAL_WORK: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_INFORMAL_HELP: Activity.OTHER,
    diary.ACT1_001.FOOD_MANAGEMENT_AS_HELP: Activity.OTHER,
    diary.ACT1_001.HOUSEHOLD_UPKEEP_AS_HELP: Activity.OTHER,
    diary.ACT1_001.GARDENING_AND_PET_CARE_AS_HELP: Activity.OTHER,
    diary.ACT1_001.CONSTRUCTION_AND_REPAIRS_AS_HELP: Activity.OTHER,
    diary.ACT1_001.SHOPPING_AND_SERVICES_AS_HELP: Activity.OTHER,
    diary.ACT1_001.HELP_IN_EMPLOYMENT_AND_FARMING: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_CHILDCARE_AS_HELP: Activity.OTHER,
    diary.ACT1_001.PHYSICAL_CARE_AND_SUPERVISION_OF_A_CHILD_AS_HELP: Activity.OTHER,
    diary.ACT1_001.TEACHING_THE_CHILD_AS_HELP: Activity.OTHER,
    diary.ACT1_001.READING__PLAYING_AND_TALKING_TO_THE_CHILD_AS_HELP: Activity.OTHER,
    diary.ACT1_001.ACCOMPANYING_THE_CHILD_AS_HELP: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_CHILDCARE_AS_HELP: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_HELP_TO_AN_ADULT_MEMBER_OF_ANOTHER_HOUSEHOLD: Activity.OTHER,
    diary.ACT1_001.PHYSICAL_CARE_AND_SUPERVISION_OF_AN_ADULT_AS_HELP: Activity.OTHER,
    diary.ACT1_001.ACCOMPANYING_AN_ADULT_AS_HELP: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_HELP_TO_AN_ADULT_MEMBER_OF_ANOTHER_HOUSEHOLD: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_INFORMAL_HELP: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_PARTICIPATORY_ACTIVITIES: Activity.OTHER,
    diary.ACT1_001.MEETINGS: Activity.OTHER,
    diary.ACT1_001.RELIGIOUS_ACTIVITIES: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_PARTICIPATORY_ACTIVITIES: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_SOCIAL_LIFE_AND_ENTERTAINMENT: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_SOCIAL_LIFE: Activity.OTHER,
    diary.ACT1_001.SOCIALISING_WITH_HOUSEHOLD_MEMBERS: Activity.OTHER,
    diary.ACT1_001.VISITING_AND_RECEIVING_VISITORS: Activity.OTHER,
    diary.ACT1_001.FEASTS: Activity.OTHER,
    diary.ACT1_001.TELEPHONE_CONVERSATION: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_SOCIAL_LIFE: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_ENTERTAINMENT_AND_CULTURE: Activity.OTHER,
    diary.ACT1_001.CINEMA: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_THEATRE_OR_CONCERTS: Activity.OTHER,
    diary.ACT1_001.PLAYS__MUSICALS_OR_PANTOMIMES: Activity.OTHER,
    diary.ACT1_001.OPERA__OPERETTA_OR_LIGHT_OPERA: Activity.OTHER,
    diary.ACT1_001.CONCERTS_OR_OTHER_PERFORMANCES_OF_CLASSICAL_MUSIC: Activity.OTHER,
    diary.ACT1_001.LIVE_MUSIC_OTHER_THAN_CLASSICAL_CONCERTS__OPERA_AND_MUSICALS: Activity.OTHER,
    diary.ACT1_001.DANCE_PERFORMANCES: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_THEATRE_OR_CONCERTS: Activity.OTHER,
    diary.ACT1_001.ART_EXHIBITIONS_AND_MUSEUMS: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_LIBRARY: Activity.OTHER,
    diary.ACT1_001.BRWING_BKS_RCDS_AUDIO_VIDEO_CDS_VDS_FROM_LIBRARY: Activity.OTHER,
    diary.ACT1_001.REFERENCE_TO_BKS_AND_OTHER_LIBRARY_MATERIALS_WITHIN_LIBRARY: Activity.OTHER,
    diary.ACT1_001.USING_INTERNET_IN_THE_LIBRARY: Activity.OTHER,
    diary.ACT1_001.USING_COMPUTERS_IN_THE_LIBRARY_OTHER_THAN_INTERNET_USE: Activity.OTHER,
    diary.ACT1_001.READING_NEWSPAPERS_IN_A_LIBRARY: Activity.OTHER,
    diary.ACT1_001.LISTENING_TO_MUSIC_IN_A_LIBRARY: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_LIBRARY_ACTIVITIES: Activity.OTHER,
    diary.ACT1_001.SPORTS_EVENTS: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_ENTERTAINMENT_AND_CULTURE: Activity.OTHER,
    diary.ACT1_001.VISITING_A_HISTORICAL_SITE: Activity.OTHER,
    diary.ACT1_001.VISITING_A_WILDLIFE_SITE: Activity.OTHER,
    diary.ACT1_001.VISITING_A_BOTANICAL_SITE: Activity.OTHER,
    diary.ACT1_001.VISITING_A_LEISURE_PARK: Activity.OTHER,
    diary.ACT1_001.VISITING_AN_URBAN_PARK__PLAYGROUND_OR_DESIGNATED_PLAY_AREA: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_ENTERTAINMENT_OR_CULTURE: Activity.OTHER,
    diary.ACT1_001.RESTING_TIME_OUT: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_SPORTS_AND_OUTDOOR_ACTIVITIES: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_PHYSICAL_EXERCISE: Activity.OTHER,
    diary.ACT1_001.WALKING_AND_HIKING: Activity.OTHER,
    diary.ACT1_001.TAKING_A_WALK_OR_HIKE_THAT_LASTS_AT_LEAST_2_MILES_OR_1_HOUR: Activity.OTHER,
    diary.ACT1_001.OTHER_WALK_OR_HIKE: Activity.OTHER,
    diary.ACT1_001.JOGGING_AND_RUNNING: Activity.OTHER,
    diary.ACT1_001.BIKING__SKIING_AND_SKATING: Activity.OTHER,
    diary.ACT1_001.BIKING: Activity.OTHER,
    diary.ACT1_001.SKIING_OR_SKATING: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_BALL_GAMES: Activity.OTHER,
    diary.ACT1_001.INDOOR_PAIRS_OR_DOUBLES_GAMES: Activity.OTHER,
    diary.ACT1_001.INDOOR_TEAM_GAMES: Activity.OTHER,
    diary.ACT1_001.OUTDOOR_PAIRS_OR_DOUBLES_GAMES: Activity.OTHER,
    diary.ACT1_001.OUTDOOR_TEAM_GAMES: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_BALL_GAMES: Activity.OTHER,
    diary.ACT1_001.GYMNASTICS: Activity.OTHER,
    diary.ACT1_001.FITNESS: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_WATER_SPORTS: Activity.OTHER,
    diary.ACT1_001.SWIMMING: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_WATER_SPORTS: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_PHYSICAL_EXERCISE: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_PRODUCTIVE_EXERCISE: Activity.OTHER,
    diary.ACT1_001.HUNTING_AND_FISHING: Activity.OTHER,
    diary.ACT1_001.PICKING_BERRIES__MUSHROOM_AND_HERBS: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_PRODUCTIVE_EXERCISE: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_SPORTS_RELATED_ACTIVITIES: Activity.OTHER,
    diary.ACT1_001.ACTIVITIES_RELATED_TO_SPORTS: Activity.OTHER,
    diary.ACT1_001.ACTIVITIES_RELATED_TO_PRODUCTIVE_EXERCISE: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_HOBBIES_AND_GAMES: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_ARTS: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_VISUAL_ARTS: Activity.OTHER,
    diary.ACT1_001.PAINTING__DRAWING_OR_OTHER_GRAPHIC_ARTS: Activity.OTHER,
    diary.ACT1_001.MAKING_VIDEOS__TAKING_PHOTOS_OR_RELATED_ACTIVITIES: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_VISUAL_ARTS: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_PERFORMING_ARTS: Activity.OTHER,
    diary.ACT1_001.SINGING_OR_OTHER_MUSICAL_ACTIVITIES: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_PERFORMING_ARTS: Activity.OTHER,
    diary.ACT1_001.LITERARY_ARTS: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_ARTS: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_HOBBIES: Activity.OTHER,
    diary.ACT1_001.COLLECTING: Activity.OTHER,
    diary.ACT1_001.COMPUTING_PROGRAMMING: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_INFORMATION_BY_COMPUTING: Activity.OTHER,
    diary.ACT1_001.INFORMATION_SEARCHING_ON_THE_INTERNET: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_INFORMATION_BY_COMPUTING: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_COMMUNICATION_BY_COMPUTER: Activity.OTHER,
    diary.ACT1_001.COMMUNICATION_ON_THE_INTERNET: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_COMMUNICATION_BY_COMPUTING: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_OTHER_COMPUTING: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_INTERNET_USE: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_COMPUTING: Activity.OTHER,
    diary.ACT1_001.CORRESPONDENCE: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_HOBBIES: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_GAMES: Activity.OTHER,
    diary.ACT1_001.SOLO_GAMES_AND_PLAY: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_GAMES_AND_PLAY_WITH_OTHERS: Activity.OTHER,
    diary.ACT1_001.BILLIARDS__POOL__SNOOKER_OR_PETANQUE: Activity.OTHER,
    diary.ACT1_001.CHESS_AND_BRIDGE: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_PARLOUR_GAMES_AND_PLAY: Activity.OTHER,
    diary.ACT1_001.COMPUTER_GAMES: Activity.OTHER,
    diary.ACT1_001.GAMBLING: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_GAMES: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_MASS_MEDIA: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_READING: Activity.OTHER,
    diary.ACT1_001.READING_PERIODICALS: Activity.OTHER,
    diary.ACT1_001.READING_BOOKS: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_READING: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_TV_WATCHING: Activity.OTHER,
    diary.ACT1_001.WATCHING_A_FILM_ON_TV: Activity.OTHER,
    diary.ACT1_001.WATCHING_SPORT_ON_TV: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_TV_WATCHING: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_VIDEO_WATCHING: Activity.OTHER,
    diary.ACT1_001.WATCHING_A_FILM_ON_VIDEO: Activity.OTHER,
    diary.ACT1_001.WATCHING_SPORT_ON_VIDEO: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_VIDEO_WATCHING: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_LISTENING_TO_RADIO_AND_MUSIC: Activity.OTHER,
    diary.ACT1_001.UNSPECIFIED_RADIO_LISTENING: Activity.OTHER,
    diary.ACT1_001.LISTENING_TO_MUSIC_ON_THE_RADIO: Activity.OTHER,
    diary.ACT1_001.LISTENING_TO_SPORT_ON_THE_RADIO: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_RADIO_LISTENING: Activity.OTHER,
    diary.ACT1_001.LISTENING_TO_RECORDINGS: Activity.OTHER,
    diary.ACT1_001.TRAVEL_RELATED_TO_UNSPECIFIED_TIME_USE: Activity.OTHER,
    diary.ACT1_001.TRAVEL_RELATED_TO_PERSONAL_BUSINESS: Activity.OTHER,
    diary.ACT1_001.TRAVEL_IN_THE_COURSE_OF_WORK: Activity.OTHER,
    diary.ACT1_001.TRAVEL_TO_WORK_FROM_HOME_AND_BACK_ONLY: Activity.OTHER,
    diary.ACT1_001.TRAVEL_TO_WORK_FROM_A_PLACE_OTHER_THAN_HOME: Activity.OTHER,
    diary.ACT1_001.TRAVEL_RELATED_TO_EDUCATION: Activity.OTHER,
    diary.ACT1_001.TRAVEL_ESCORTING_TO_FROM_EDUCATION: Activity.OTHER,
    diary.ACT1_001.TRAVEL_RELATED_TO_HOUSEHOLD_CARE: Activity.OTHER,
    diary.ACT1_001.TRAVEL_RELATED_TO_SHOPPING: Activity.OTHER,
    diary.ACT1_001.TRAVEL_RELATED_TO_SERVICES: Activity.OTHER,
    diary.ACT1_001.TRAVEL_ESCORTING_A_CHILD_OTHER_THAN_EDUCATION: Activity.OTHER,
    diary.ACT1_001.TRAVEL_ESCORTING_AN_ADULT_OTHER_THAN_EDUCATION: Activity.OTHER,
    diary.ACT1_001.TRAVEL_RELATED_TO_ORGANISATIONAL_WORK: Activity.OTHER,
    diary.ACT1_001.TRAVEL_RELATED_TO_INFORMAL_HELP_TO_OTHER_HOUSEHOLDS: Activity.OTHER,
    diary.ACT1_001.TRAVEL_RELATED_TO_RELIGIOUS_ACTIVITIES: Activity.OTHER,
    diary.ACT1_001.TRAVEL_RLT_TO_PARTICIPATORY_ACTV_EXCEPT_REL_ACTV: Activity.OTHER,
    diary.ACT1_001.TRAVEL_TO_VISIT_FRIENDS_RELATIVES_IN_THEIR_HOMES: Activity.OTHER,
    diary.ACT1_001.TRAVEL_RELATED_TO_OTHER_SOCIAL_ACTIVITIES: Activity.OTHER,
    diary.ACT1_001.TRAVEL_RELATED_TO_ENTERTAINMENT_AND_CULTURE: Activity.OTHER,
    diary.ACT1_001.TRAVEL_RELATED_TO_PHYSICAL_EXERCISE: Activity.OTHER,
    diary.ACT1_001.TRAVEL_RELATED_TO_HUNTING_AND_FISHING: Activity.OTHER,
    diary.ACT1_001.TRAVEL_RELATED_TO_PRODUCTIVE_EXCS_EXPT_HUNTING_AND_FISHING: Activity.OTHER,
    diary.ACT1_001.TRAVEL_RELATED_TO_GAMBLING: Activity.OTHER,
    diary.ACT1_001.TRAVEL_RELATED_TO_HOBBIES_OTHER_THAN_GAMBLING: Activity.OTHER,
    diary.ACT1_001.TRAVEL_TO_HOLIDAY_BASE: Activity.OTHER,
    diary.ACT1_001.TRAVEL_FOR_DAY_TRIP_JUST_WALK: Activity.OTHER,
    diary.ACT1_001.OTHER_SPECIFIED_TRAVEL: Activity.OTHER,
    diary.ACT1_001.PUNCTUATING_ACTIVITY: Activity.OTHER,
    diary.ACT1_001.FILLING_IN_THE_TIME_USE_DIARY: Activity.OTHER,
    diary.ACT1_001.NO_MAIN_ACTIVITY__NO_IDEA_WHAT_IT_MIGHT_BE: Activity.UNKNOWN,
    diary.ACT1_001.NO_MAIN_ACTIVITY__SOME_IDEA_WHAT_IT_MIGHT_BE: Activity.UNKNOWN,
    diary.ACT1_001.ILLEGIBLE_ACTIVITY: Activity.UNKNOWN,
    diary.ACT1_001.UNSPECIFIED_TIME_USE: Activity.UNKNOWN,
    diary.ACT1_001.MISSING1: Activity.UNKNOWN
}

In [None]:
simple_ac_loc = ac_loc[['location', 'activity']].applymap(lambda val: location_map[val] if isinstance(val, diary.WHER_001) else activity_map[val])
simple_ac_loc['activity'] = simple_ac_loc['activity'].astype('category', categories=[ac for ac in Activity])
simple_ac_loc['location'] = simple_ac_loc['location'].astype('category', categories=[loc for loc in Location])

In [None]:
simple_ac_loc.head()

## Define Markov States

In [None]:
class SimpleMarkov(Enum):
    HOME = 1
    SLEEP_AT_HOME = 2
    OTHER_HOME = 3
    WORK_OR_SCHOOL = 4
    RESTO = 5
    SPORTS_FACILITY = 6
    ARTS_OR_CULTURAL_CENTRE = 7
    OUTSIDE = 8
    TRAVELLING = 9

In [None]:
markov_states = pd.Series(index=ac_loc.index, dtype='category')
markov_states.cat.add_categories([state for state in SimpleMarkov], inplace=True)

In [None]:
mask_home = simple_ac_loc.location == Location.HOME
mask_sleep = simple_ac_loc.activity == Activity.SLEEP
mask_other_home = simple_ac_loc.location == Location.OTHER_HOME
mask_work = (simple_ac_loc.location == Location.WORK_OR_SCHOOL) | (simple_ac_loc.activity == Activity.WORK_OR_STUDY)
mask_resto = simple_ac_loc.location == Location.RESTO
mask_sport = simple_ac_loc.location == Location.SPORTS_FACILITY
mask_arts = simple_ac_loc.location == Location.ARTS_OR_CULTURAL_CENTRE
mask_outside = simple_ac_loc.location == Location.OUTSIDE
mask_travelling = simple_ac_loc.location == Location.TRAVELLING

In [None]:
markov_states[mask_home] = SimpleMarkov.HOME
markov_states[mask_sleep] = SimpleMarkov.SLEEP_AT_HOME
markov_states[mask_other_home] = SimpleMarkov.OTHER_HOME
markov_states[mask_outside] = SimpleMarkov.OUTSIDE
markov_states[mask_resto] = SimpleMarkov.RESTO
markov_states[mask_sport] = SimpleMarkov.SPORTS_FACILITY
markov_states[mask_travelling] = SimpleMarkov.TRAVELLING
markov_states[mask_work] = SimpleMarkov.WORK_OR_SCHOOL
markov_states[mask_arts] = SimpleMarkov.ARTS_OR_CULTURAL_CENTRE

In [None]:
markov_states.describe()

In [None]:
len(markov_states)

In [None]:
markov_states.groupby(by=markov_states).count().sort_values()

### Example Days

In [None]:
from itertools import groupby

def day_overview(time_series, day_index):
    day = time_series.loc[day_index]
    activities = []
    duration = []
    for key, times in groupby(day.index, lambda time_index: day.loc[time_index]):
        times = list(times)
        hours = [index[0] for index in times]
        minutes = [index[1] for index in times]
        activities.append(key)
        duration.append('From {:0>2}:{:0>2} till {:0>2}:{:0>2}'.format(hours[0], minutes[0], hours[-1], minutes[-1]))
    print(pd.Series(index=activities, data=duration))

#### (1, 227, 1, 2)

In [None]:
record = (1, 227, 1, 2)

In [None]:
day_overview(markov_states, record)

It's akward that there seem to be two times episodes in which the person is travelling from home to home. Let's look at the original data.

In [None]:
day_overview(ac_loc.activity, (1, 227, 1, 2))

In [None]:
day_overview(ac_loc.location, (1, 227, 1, 2))

In [None]:
day_overview(ac_loc.secondary_activity, (1, 227, 1, 2))

Looking at the original data, one can see that the mapping is correct. The person did not give destinations for the two shopping trips, but instead only states travelling during the time.

#### (1, 249, 3, 1)

In [None]:
record = (1, 249, 3, 1)

In [None]:
day_overview(markov_states, record)

This is a straightforward record.

#### (8, 75, 1, 2)

In [None]:
record = (8, 75, 1, 2)

In [None]:
day_overview(markov_states, record)

Two things are noteworthy here: 

* The repeating travels from home to home.
* The NaNs.

In [None]:
day_overview(ac_loc.location, record)

In [None]:
day_overview(ac_loc.activity, record)

In [None]:
day_overview(ac_loc.secondary_activity, record)

Some conclusions:

* shopping is not properly covered as there is no location for shopping
* walks, and walking the dog aren't explicitely covered, but simply covered by a list of home/travel/home. Should be fine.
* NaNs should be filtered out, if not able to solve.

## Filter

In [None]:
filtered_participants = individual_data.copy()
len(filtered_participants)

###  Filter Participants that exist only in either one data set

In [None]:
diary_particpants_index = pd.MultiIndex(labels=diary_data.index.labels[0:3],
                                        levels=diary_data.index.levels[0:3],
                                        names=['SN1', 'SN2', 'SN3'])

In [None]:
participants_not_in_individual = diary_particpants_index.difference(individual_data.index)
len(participants_not_in_individual)

In [None]:
participants_not_in_diary = individual_data.index.difference(diary_particpants_index)
len(participants_not_in_diary)

In [None]:
filtered_participants = filtered_participants.reindex(filtered_participants.index.difference(participants_not_in_diary))
assert len(filtered_participants) == 11664 - 1523

### Filter  Participants with only one entry 

In [None]:
valid_mask = ac_loc.groupby([ac_loc.index.get_level_values(0), 
                             ac_loc.index.get_level_values(1), 
                             ac_loc.index.get_level_values(2)]).apply(lambda values: len(values) == 24 * 6 * 2)
valid_mask = valid_mask.reindex(filtered_participants.index, fill_value=False)
valid_mask.describe()

In [None]:
10141 - 9997

144 of remaining participants have only one entry and are filtered out.

In [None]:
filtered_participants = filtered_participants[valid_mask]
assert len(filtered_participants) == 11664 - 1523 - 144

### Filter NaNs

In [None]:
nan_mask = markov_states.groupby(by=lambda index: (index[0], index[1], index[2])).apply(lambda values: values.isnull().any())
nan_mask = nan_mask.reindex(filtered_participants.index, fill_value=True)
nan_mask.describe()

In [None]:
9997 - 8036

8036 out of the remaining participants don't have two valid diaries.

In [None]:
filtered_participants = filtered_participants[~nan_mask]
assert len(filtered_participants) == 11664 - 1523 - 144 - 8036

### Filter Adults

In [None]:
adult_mask = diary_data.groupby([diary_data.index.get_level_values(0), 
                                 diary_data.index.get_level_values(1), 
                                 diary_data.index.get_level_values(2)]).DTYPE.apply(lambda values: values[0] == diary.DTYPE.ADULT_DIARY)
adult_mask = adult_mask.reindex(filtered_participants.index, fill_value=False)
adult_mask.describe()

In [None]:
1961 - 1767

194 of the remaining participants are childrean and are filtered out.

In [None]:
filtered_participants = filtered_participants[adult_mask]
assert len(filtered_participants) == 11664 - 1523 - 144 - 8036 - 194

### Filter Working Population

In [None]:
work_mask = filtered_participants.ECONACT == individual.ECONACT.ECON_ACTIVE___IN_EMPLOYMENT
work_mask.describe()

In [None]:
1767 - 928

839 of the remaining participants don't have work and are filtered out.

In [None]:
filtered_participants = filtered_participants[work_mask]
assert len(filtered_participants) == 11664 - 1523 - 144 - 8036 - 194 - 839

### Filtered Markov States

In [None]:
diary_data_index = pd.MultiIndex(labels=diary_data.index.labels[0:3], 
                                 levels=diary_data.index.levels[0:3], 
                                 names=['SN1', 'SN2', 'SN3']).drop_duplicates()

In [None]:
markov_mask = pd.Series(True, index=filtered_participants.index).reindex(diary_data_index, fill_value=False)

In [None]:
assert len(markov_mask[markov_mask == True]) == 928

In [None]:
filtered_markov_states = markov_states[markov_mask.reindex(markov_states.index, method='ffill')]
assert len(filtered_markov_states) == 928 * 2 * 24 * 6

In [None]:
filtered_markov_states.head()

### Filter Weekends
Lastly, let's remove all weekend days to end up with only weekdays.

In [None]:
weekday_mask = diary_data.DDAYW2 == diary.DDAYW2.WEEKDAY_MON___FRI
weekday_mask = weekday_mask.reindex(filtered_markov_states.index, method='ffill')
weekday_mask.describe()

In [None]:
filtered_markov_states = filtered_markov_states[weekday_mask]
assert len(filtered_markov_states) == 928 * 24 * 6

## Create Markov Chain

In [None]:
import pykov
from collections import OrderedDict

In [None]:
markov_table = filtered_markov_states.unstack(level=[0, 1, 2, 3])
markov_table.head()

In [None]:
def markov_chain(hour, minute, markov_table):
    chain_elements = [((current_state, next_state), probability(current_state, next_state, hour, minute, markov_table)) 
                      for current_state in SimpleMarkov 
                      for next_state in SimpleMarkov]
    return pykov.Chain(OrderedDict(chain_elements))
            
def probability(current_state, next_state, hour, minute, markov_table):
    current_vector = markov_table.ix[hour].ix[minute]
    if current_state in current_vector.unique():
        next_hour, next_minute = next_time_stamp(hour, minute)
        next_vector = markov_table.ix[next_hour].ix[next_minute]
        next_instances = len(next_vector[(current_vector == current_state) & 
                                         (next_vector == next_state)])
        current_instances = len(current_vector[current_vector == current_state])
        return next_instances / current_instances
    else:
        return 0
    
def next_time_stamp(hour, minute):
    if minute == 50:
        return hour + 1, 0
    else:
        return hour, minute + 10

In [None]:
def chain_repr(self):
    lines = []
    lines.append('<table>')
    lines.append('    <tr>')
    lines.append('        <th>From</th>')
    lines.append('        <th>To</th>')
    lines.append('        <th>Probability</th>')
    lines.append('    </tr>')
    for element in self:
        lines.append('    <tr>')
        lines.append('        <td>{}</td>'.format(element[0]))
        lines.append('        <td>{}</td>'.format(element[1]))
        lines.append('        <td>{}</td>'.format(self[element]))
        lines.append('    </tr>')
    lines.append('</table>')
    lines = [line + '\n' for line in lines]
    return ''.join(lines)

In [None]:
pykov.Chain._repr_html_ = chain_repr

In [None]:
markov_chain(0, 0, markov_table)

In [None]:
markov_chain(14, 0, markov_table)