# American Time Use Survey

[Data and documentation](https://www.bls.gov/tus/data.htm)

## Bowling Alone

[Wikipedia](https://en.wikipedia.org/wiki/Bowling_Alone)

In [None]:
import pandas as pd
import numpy as np
import requests
import io
import matplotlib.pyplot as plt
import scipy.stats as stats
from dateutil.parser import parse
import datetime as dt
from zipfile import ZipFile
import os

In [None]:
files = ['who', 'act']

ATUS = dict()

# multiyear data
# Zip files have URLs like https://www.bls.gov/tus/datafiles/atusrost-0322.zip
# Above pattern can change year to year. Update accordingly.

base_url = 'https://www.bls.gov/tus/datafiles/atus'
ending = f'-03{(dt.date.today() - dt.timedelta(days = 365)).strftime("%y")}.zip'

# Download data, read to pandas, and delete local files
for file in files:
    url = f'{base_url}{file}{ending}'
    r = requests.get(url)
    
    # Try to get data directly from web 
    try:
        # this doesn't work Aug '23 because BLS blocks bots
        z = ZipFile(io.BytesIO(r.content))
        z.extractall()
        ATUS[file] = pd.read_csv(z.open(ending))
    
    # Download files locally then delete
    except:
        print(url, 'click to download')
        input('Confirm Download')
        
        os.system(f'cp ~/Downloads/atus{file}{ending} {os.getcwd()}')
        with ZipFile(f'atus{file}{ending}', 'r') as z:
            z.extractall()
            dat = ending.replace(".zip",'.dat').replace("-",'_')
            ATUS[file] = pd.read_csv(z.open(f'atus{file}{dat}'))
            
            # delete files
            files = !ls
            for f in files:
                if f.startswith(f'atus{file}') or f.endswith("_info.txt"):
                    os.system(f"rm {f}")

In [None]:
activity = ATUS['act'] # Dataframe for activities

# Get activity columns
bowling_code = 130107
# capture volunteering, helping nonhh members, govt civic obligations, religious activities
prosocial_cols = [x for x in activity.TRCODEP.unique() if str(x)[0:2] in ['15', '40', '100', '14']]

is_bowling = activity.TRCODEP == bowling_code
bowlers = activity[is_bowling].TUCASEID.unique()

# Reduce to bowlers
activity = activity[activity.TUCASEID.isin(bowlers)]

# Find who was alone
helper = ATUS['who']
helper['is_alone'] = helper.TUWHO_CODE.isin([18,19])
helper.sort_values(['TUCASEID','TUACTIVITY_N','is_alone'], 
                   inplace = True)
helper = helper[['TUCASEID','TUACTIVITY_N','is_alone']].\
                drop_duplicates(['TUCASEID','TUACTIVITY_N'],
                                keep = 'last')
data = helper.merge(activity, 
                    on = ['TUCASEID','TUACTIVITY_N'],
                    validate = 'one_to_one')

# mark pro-social
data['is_prosocial'] = data.TRCODEP.isin(prosocial_cols)
data['pro_social_minutes'] = data.TUACTDUR24 * 1 * data.is_prosocial
#activity[activity.is_prosocial == True].groupby("TUCASEID").TUACTDUR24.sum()

In [None]:
# people who bowled alone
alone = data.is_alone == True
is_bowling = data.TRCODEP == bowling_code

all_bowlers = set(data[is_bowling].TUCASEID)
solo_bowlers = set(data[is_bowling & alone].TUCASEID)
social_bowlers = all_bowlers - solo_bowlers

In [None]:
data['is_solo_bowler'] = data.TUCASEID.isin(solo_bowlers)
data.groupby(["is_solo_bowler"]).TUCASEID.nunique()

In [None]:
x = data[data.is_solo_bowler == True].groupby(['TUCASEID']).pro_social_minutes.sum() 
y = data[data.is_solo_bowler == False].groupby(['TUCASEID']).pro_social_minutes.sum() #).hist(bins = 20)
#x2 = np.log(x+1)

fig, axs = plt.subplots(1, 2, sharey = False, figsize = (12,4))
x.hist(bins = 10, ax = axs[0], fc = 'gray', ec = 'black') #, normalize = True)
axs[0].set_title("Solo Bowlers")
axs[0].set_ylabel("Frequency")
axs[0].set_xlabel("Pro-social Minutes")

y.hist(bins = 10, ax = axs[1], ec = 'black') #, normalize = True)
axs[1].set_title("Social Bowlers")
axs[1].set_ylabel("Frequency")
axs[1].set_xlabel("Pro-social Minutes")

for ax in axs:
    ax.yaxis.grid(False)
    ax.xaxis.grid(False)

plt.tight_layout()
#plt.savefig("prosocialbowlerhistograms.pdf")

In [None]:
# think about skewed data 
stats.ttest_ind(x, y, equal_var = False)

In [None]:
# what does the data look like
table = pd.DataFrame(index = ['Alice', 'Bob', 'Dale Jr.'])
table['Type of Bowler'] = ['Solo', 'Social', 'Social']
table['Pro-Social Activity'] = [0,10, 100]
print(table.style.to_latex(hrules = True))