# Create Pivot Tables

Create a pivot table of patients and conditions. Then combine all individuals and their variants into a single table for analysis.

In [17]:
import sqlalchemy as sa
import pandas as pd

In [18]:
# read db connection from .env file
from helpers.settings import get_settings

settings = get_settings()

In [19]:
engine = sa.create_engine(settings.postgresql_dsn.__str__())

In [20]:
df = pd.read_sql("""
SELECT i.individual_id, c.condition, ic.has_condition
FROM individual i
JOIN individual_condition ic
ON i.individual_id = ic.individual_id
JOIN condition c
ON ic.condition_id = c.condition_id
""", engine)
df.head()

Unnamed: 0,individual_id,condition,has_condition
0,1,Catecholaminergic polymorphic ventricular tach...,True
1,1,Heart Structure Abnormality,False
2,1,Baseline/resting electrocardiogram abnormality,False
3,1,Polymorphic ventricular tachycardia,True
4,1,Exercise/stress induced polymorphic ventricula...,True


In [21]:
df2 = df.copy()
df2 = df2.pivot(index='individual_id', columns='condition', values='has_condition')
df2.reset_index(inplace=True)

df2.head()

condition,individual_id,Adult-onset primary generalised epilepsy,Arrhythmia at rest,Arrhythmogenic right ventricular cardiomyopathy,Arteriovenous malformation,Ascending aortic aneurysm,Atrial fibrillation,Atrial flutter,Atrial standstill,Atrial tachycardia,...,Sudden cardiac death,Sudden infant death syndrome,Supraventricular tachycardia,Syncope,Syncope exercise/stress induced,Third-degree atrioventricular block,Unspecified premature ventricular contractions,Ventricular fibrillation,Ventricular tachycardia (unspecified),Weight loss
0,1,,,,,,,,,,...,,,,,,,,,,
1,2,,,,,,,,,,...,,,,False,,,False,,,
2,3,,,,,,False,,,False,...,,,,True,True,,False,,,
3,4,,False,,,,False,,,True,...,,,,False,,,False,,,
4,5,,,,,,,,,,...,,,,True,,,,,,


In [22]:
# find the current working directory
import os
import IPython

notebook_path = IPython.get_ipython().getoutput('pwd')[0]


data_dir = os.path.join(notebook_path, '..', 'data')

if not os.path.exists(data_dir):
    print(f'Creating directory: {data_dir}')
    os.makedirs(data_dir)

In [23]:
# save to csv
df_file = os.path.join(data_dir, 'individual_conditions.csv')
df2.to_csv(df_file, index=False)

# Combine with rest of data