## Patients 65+ Descriptive Analysis & Table1

In [None]:
#import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#install TBL1
!pip install tableone

In [None]:
#import Tbl1
from tableone import TableOne

In [None]:
#read file TableObe65plus and assign to object
elders_data = pd.read_csv('TableOne65plus_v5.txt', dtype={0: str, 1: str, 2: str, 3: str, 4: str, 5: str, 6: str, 7: str, 8: float})

In [None]:
#specify columns 
elders_data.columns = ['ID', 'FPL', 'Race', 'Hispanic', 'Sex', 'Lang', 'M_Status','State', 'Age']

In [None]:
print(elders_data.info())

In [None]:
print(elders_data.shape)

In [None]:
print(elders_data.head())

In [None]:
print(elders_data.describe().round(2))

In [None]:
#number of unique values in column ID 
num_distinct_ids = elders_data['ID'].nunique()
print(num_distinct_ids)

In [None]:
#dataframe sorted by ID and Age
elders_data_sorted = elders_data.sort_values(['ID', 'Age'])

#Set the display width to a larger value to see it in one line
pd.set_option('display.width', 1000)

#visual validation
print(elders_data_sorted.head())

In [None]:
#sorted dataframe grouped by ID 
#select the first row (the row with the minimum age) for each group
elders_data_min_age = elders_data_sorted.groupby('ID').first().reset_index()

In [None]:
#visual validation
pd.set_option('display.width', 1000)
print(elders_data_min_age.head())

In [None]:
#visual validation
print(elders_data_min_age.shape)

In [None]:
#visual validation
pd.set_option('display.width', 1000)
print(elders_data_min_age.head(100))

In [None]:
pd.set_option('display.width', 2000)
print(elders_data_min_age.shape,elders_data_min_age.info(), elders_data_min_age.describe())
print(elders_data_min_age.groupby('Hispanic')['Age'].agg(['count', min, max, 'mean']))
print(elders_data_min_age.groupby('Race')['Age'].agg(['count', min, max, 'mean']))

In [None]:
print('Age min =', elders_data_min_age['Age'].min(), 'and Age max =', elders_data_min_age['Age'].max())

In [None]:
#export the dataframe to a txt file
elders_data_min_age.to_csv('elders_data_min_age.txt', index=True, header=True)

In [None]:
#read file num_encounters_all_patients and assign to object
num_encounters_all_patients = pd.read_csv('num_encounters_all_patients.txt', dtype={0: str, 1: str, 2: str, 3: str, 4: float})

In [None]:
#specify columns 
num_encounters_all_patients.columns = ['ID', 'F_treatYR', 'L_treatYR', 'N_Encounters']

In [None]:
print(num_encounters_all_patients.head())

In [None]:
print(num_encounters_all_patients.info())

In [None]:
num_encounters_all_patients['N_Encounters'] = num_encounters_all_patients['N_Encounters'].astype(float)

In [None]:
#join data frames on ID column
elders_data_min_age_encounters = pd.merge(elders_data_min_age, num_encounters_all_patients, on='ID')

In [None]:
print(elders_data_min_age_encounters.info())

In [None]:
pd.set_option('display.width', 1000)
print(elders_data_min_age_encounters.head())

In [None]:
#TableOne for selected variables (elimine State, pero se puede incluir)
selected_columns = ['FPL', 'Hispanic', 'Sex', 'Lang', 'M_Status', 'Age', 'N_Encounters']
categorical_columns = ['FPL', 'Hispanic', 'Sex', 'Lang', 'M_Status']
tbl2 = TableOne(data = elders_data_min_age_encounters, columns = selected_columns, categorical = categorical_columns, groupby ='Hispanic', label_suffix=True, htest_name=True, pval=True)
pd.set_option('display.width', 100)
print(tbl2.tabulate(tablefmt ='presto'))

In [None]:
#export table
tbl2.to_html('table1.html', index=False)

## Plots

In [None]:
sns.boxplot(data=elders_data_min_age, x='Hispanic', y='Age').set_title('Hispanic (Y/N) by Age', y=1.0)
plt.show()

In [None]:
sns.boxplot(data=elders_data_min_age, x='Sex', y='Age').set_title('Sex by Age', y=1.0)
plt.show()

In [None]:
sns.boxplot(data=elders_data_min_age,y='M_Status', x='Age', hue='Hispanic').set_title('Age by Marital Status and Hispanic', y=1.0)
plt.legend(loc='lower right')
plt.show()

In [None]:
plt.figure(figsize=(5, 3))
sns.boxplot(data=elders_data_min_age,y='Hispanic', x='Age', hue='Sex').set_title('Hispanic by Age and Sex', y=1.0)

plt.legend(loc='lower right')
plt.show()


In [None]:
plt.figure(figsize=(10, 6)) 
sns.boxplot(data=elders_data_min_age_encounters, y='Hispanic', x='N_Encounters').set_title('Number of Encounters by Hispanic and Age', y=1.0)
plt.xlim(0, 1000)
plt.show()

In [None]:
summary_stats = elders_data_min_age_encounters.groupby('Hispanic')['N_Encounters'].describe().round(2)
print(summary_stats.T)

In [None]:
#export the dataframe to a txt file
elders_data_min_age_encounters.to_csv('elders_data_min_age_encounters.txt', index=True, header=True)