# Create Tables that Track Variables Per-Visit
Given a category or source, how many variables were collected during each visit.

In [None]:
import pickle
import numpy as np
import pandas as pd

In [None]:
# Constants
visits = {'P02':'IEI', 'P01':'SV', 'V00':'EV', 'V01':'12m', 'V02':'18m', 'V03':'24m', 'V04':'30m', 'V05':'36m', 'V06':'48m', 'V07':'60m', 'V08':'72m', 'V09':'84m', 'V10':'96m', 'V11':'108m', 'V99':"Outcomes"}

# Grouping variables by category/subcategory and visit

In [None]:
# Created in 'Parse the VG_Form.pdf for OAI variable categories and sources' notebook
vars_cat_df = pickle.load(open('oai_vars_categories_subcategories.pkl', 'rb' ))

In [None]:
# Use the visit prefix from the variable names to group data by cat/subcat/visit
# This is just to get a sense of things
tmp_df = vars_cat_df.copy()
tmp_df['Visit'] = tmp_df.Variable.str[:3]
for visit in visits.keys():
    tmp_df[visit] = np.where(tmp_df.Visit == visit, True, False)
tmp_df = tmp_df.drop(columns=['Variable', 'Visit'])
tmp_df = tmp_df.groupby(['Category', 'Subcategory'], observed=True).sum()

pd.set_option('display.max_rows', None)
display(tmp_df)

## Dump variable count per cat/subcat and visit into a wiki table

In [None]:
# Dump categories and subcategories into a wiki table
# Table lists categories and subcategories and which visits collected any data related to that subcategory

table_str = '{| class="wikitable"\n ! Category !! Subcategory !! ' + ' !! '.join(visits.values())
last_cat = ''
for group, sub in tmp_df.index:
    table_str += '\n|-\n'
    if group != last_cat:
         table_str += '|rowspan=' + str(len(tmp_df.loc[group].index)) + ' | ' + group + '\n'
    table_str += '| ' + sub
    for col in visits:
        table_str += ' || ' + str(tmp_df.loc[group].loc[sub][col])
    last_cat = group
table_str += '\n|}'

print(table_str)

# Grouping variables by variable source and visit

In [None]:
# Created in 'Parse the VG_Form.pdf for OAI variable categories and sources' notebook
vars_df = pickle.load(open('oai_vars_labels_sources.pkl', 'rb' ))

In [None]:
tmp_df = vars_df.copy()
tmp_df['Visit'] = tmp_df.Variable.str[:3]
for visit in visits.keys():
    tmp_df[visit] = np.where(tmp_df.Visit == visit, True, False)
tmp_df = tmp_df.drop(columns=['Variable', 'Visit'])
tmp_df = tmp_df.groupby(['Source'], observed=True).sum()

pd.set_option('display.max_rows', None)
display(tmp_df)

## Dump variable counts per source and visit into a wiki table

In [None]:
# Table lists sources and which visits collected any data related to that source
table_str = '{| class="wikitable"\n! Source !! ' + ' !! '.join(visits.values())
last_src = ''
for src in tmp_df.index:
    table_str += '\n|-\n'
    if src != last_src:
        table_str += '| ' + src
    for col in visits:
        table_str += ' || ' + str(tmp_df.loc[src].loc[col])
    last_cat = src
table_str += '\n|}'

print(table_str)