# American Time Use Survey

[Data and documentation](https://www.bls.gov/tus/data.htm)

[Data dictionary](https://www.bls.gov/tus/dictionaries/atusintcodebk0322.pdf) e.g. what does `TULINENO` mean? <br>
[Activity coding lexicons](https://www.bls.gov/tus/lexicons/lexiconnoex0322.pdf) e.g. `t010101` is sleeping.

In [1]:
import pandas as pd
import numpy as np
import requests
import io
import matplotlib.pyplot as plt
from dateutil.parser import parse
import datetime as dt
from zipfile import ZipFile
import os



In [4]:
files = ['sum','resp', 'who', 'act'] # ["resp", "rost", "sum", "act", "cps", "who"]

ATUS = dict()

# multiyear data
# Zip files have URLs like https://www.bls.gov/tus/datafiles/atusrost-0322.zip
# Above pattern can change year to year. Update accordingly.

base_url = 'https://www.bls.gov/tus/datafiles/atus'
ending = f'-03{(dt.date.today() - dt.timedelta(days = 365)).strftime("%y")}.zip'


# Download data, read to pandas, and delete local files
for file in files:
    url = f'{base_url}{file}{ending}'
    r = requests.get(url)
    
    # Try to get data directly from web 
    try:
        # this doesn't work Aug '23 because BLS blocks bots
        z = ZipFile(io.BytesIO(r.content))
        z.extractall()
        ATUS[file] = pd.read_csv(z.open(ending))
    
    # Download files locally then delete
    except:
        print(url, 'click to download')
        input('Confirm Download')
        
        os.system(f'cp ~/Downloads/atus{file}{ending} {os.getcwd()}')
        with ZipFile(f'atus{file}{ending}', 'r') as z:
            z.extractall()
            dat = ending.replace(".zip",'.dat').replace("-",'_')
            ATUS[file] = pd.read_csv(z.open(f'atus{file}{dat}'))
            
            # delete files
            files = !ls
            for f in files:
                if f.startswith(f'atus{file}') or f.endswith("_info.txt"):
                    os.system(f"rm {f}")

https://www.bls.gov/tus/datafiles/atussum-0322.zip click to download
Confirm Download1
https://www.bls.gov/tus/datafiles/atusresp-0322.zip click to download
Confirm Download1
https://www.bls.gov/tus/datafiles/atuswho-0322.zip click to download
Confirm Download1
https://www.bls.gov/tus/datafiles/atusact-0322.zip click to download
Confirm Download1


# Data prep

Make one respondent-level dataframe with info on 


* time spent sleeping
* time spent alone
* time spent with friends
* time spent bowling

In [49]:
respondent_columns = ['TUCASEID', 'TRTALONE', 'TRTFRIEND']

# activity codes
prosocial_activities = [x for x in ATUS['sum'].columns if str(x)[0:3] in ['t15', 't40', 't10', 't14']]
bowling = ['t130107']


summary_columns = ['t010101', 'TEAGE', 't130107']


clay = ATUS['resp'][respondent_columns]

who_cols = ['TUCASEID', 'TUACTIVITY_N', 'TUWHO_CODE']
who_helper = ATUS['who'][who_cols].sort_values(['TUCASEID','TUACTIVITY_N'])
activity_who_info = x.groupby(['TUCASEID','TUACTIVITY_N']).TUWHO_CODE.apply(lambda x: list(x))
activity_who_info = activity_who_info.to_frame().reset_index()

activity_and_who = ATUS['act'].merge(activity_who_info, 
                                    on = ['TUCASEID', 'TUACTIVITY_N'],
                                     validate = 'one_to_one')

In [41]:
x = ATUS['who'][who_cols].sort_values(['TUCASEID','TUACTIVITY_N'])
#xx = x.duplicated(['TUCASEID','TUACTIVITY_N'], keep = False)

#x[xx]
activity_whos = x.groupby(['TUCASEID','TUACTIVITY_N']).TUWHO_CODE.apply(lambda x: list(x))

In [48]:
activity_whos.to_frame().reset_index()

Unnamed: 0,TUCASEID,TUACTIVITY_N,TUWHO_CODE
0,20030100013280,1,[18]
1,20030100013280,2,[-1]
2,20030100013280,3,[-1]
3,20030100013280,4,[18]
4,20030100013280,5,[20]
...,...,...,...
4587361,20221212222501,22,[19]
4587362,20221212222501,23,[19]
4587363,20221212222501,24,[19]
4587364,20221212222501,25,[19]


In [44]:
activity_whos.unstack()

helper =[x for x in activity_whos.values if len(x) > 1]
helper2 = [x for x in helper if 18 in x]


In [45]:
helper2

[[18, 22],
 [18, 25],
 [18, 57, 58],
 [18, 22],
 [18, 27],
 [18, 22],
 [18, 22],
 [18, 22],
 [18, 22],
 [18, 22],
 [18, 22],
 [18, 22],
 [18, 22, 22],
 [18, 22],
 [18, 22]]

In [26]:
 ATUS['act'].sort_values(['TUCASEID', 'TUACTIVITY_N'])

Unnamed: 0,TUCASEID,TUACTIVITY_N,TUACTDUR24,TUCC5,TUCC5B,TRTCCTOT_LN,TRTCC_LN,TRTCOC_LN,TUSTARTTIM,TUSTOPTIME,...,TRTONHH_LN,TRTOHH_LN,TRTHH_LN,TRTNOHH_LN,TEWHERE,TUCC7,TRWBELIG,TRTEC_LN,TUEC24,TUDURSTOP
0,20030100013280,1,60,-1,-1,-1,-1,-1,04:00:00,05:00:00,...,-1,-1,-1,-1,9,-1,-1,-1,-1,-1
1,20030100013280,2,30,-1,-1,-1,-1,-1,05:00:00,05:30:00,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
2,20030100013280,3,600,-1,-1,-1,-1,-1,05:30:00,15:30:00,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
3,20030100013280,4,150,-1,-1,-1,-1,-1,15:30:00,18:00:00,...,-1,-1,-1,-1,1,-1,-1,-1,-1,-1
4,20030100013280,5,5,-1,-1,-1,-1,-1,18:00:00,18:05:00,...,-1,-1,-1,-1,1,-1,-1,-1,-1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4587361,20221212222501,22,15,0,0,0,-1,0,17:30:00,17:45:00,...,-1,-1,-1,-1,1,0,-1,-1,-1,1
4587362,20221212222501,23,15,0,0,0,-1,0,17:45:00,18:00:00,...,-1,-1,-1,-1,1,0,-1,-1,-1,1
4587363,20221212222501,24,240,0,0,0,-1,0,18:00:00,22:00:00,...,-1,-1,-1,-1,1,0,-1,-1,-1,2
4587364,20221212222501,25,30,0,0,0,-1,0,22:00:00,22:30:00,...,-1,-1,-1,-1,1,0,-1,-1,-1,2
