# Example of merging dfs

In [1]:
import pandas as pd
import numpy as np
import sqlite3

In [2]:
# Sample df

patient_df = pd.DataFrame({
    'PatientID' : range(1, 51),
    'BloodPressure': np.random.randint(80, 180, 50),
    'Smoking': np.random.randint(0, 2, 50)
    # 'WhiteBloodCellCount': np.random.randint(300, 800, 50),
    # 'CholesterolLevels': np.random.randint(5, 30, 50)
})

BloodPressure_range_df = pd.DataFrame({
    'BloodPressureDiagnosisID': range(3),
    'Low': [0, 121, 141],
    'High': [120, 140, 5000]
})

BloodPressureDiagnosis_df = pd.DataFrame({
    'BloodPressureDiagnosisID': range(3),
    'BloodPressureDiagnosis': ['No treatment needed', 'Exercise Daily', 'You\'re screwed!']
})

SmokingDiagnosis_df = pd.DataFrame({
    'Smoking': [0, 1],
    'SmokingDiagnosis': ['Good job!', 'STOP IT NOW DUMMY!']
})


In [3]:
patient_df

Unnamed: 0,PatientID,BloodPressure,Smoking
0,1,168,1
1,2,91,0
2,3,125,1
3,4,142,1
4,5,165,1
5,6,133,1
6,7,92,0
7,8,145,1
8,9,105,1
9,10,112,0


In [4]:
BloodPressure_range_df

Unnamed: 0,BloodPressureDiagnosisID,Low,High
0,0,0,120
1,1,121,140
2,2,141,5000


In [5]:
BloodPressureDiagnosis_df

Unnamed: 0,BloodPressureDiagnosisID,BloodPressureDiagnosis
0,0,No treatment needed
1,1,Exercise Daily
2,2,You're screwed!


In [6]:
SmokingDiagnosis_df

Unnamed: 0,Smoking,SmokingDiagnosis
0,0,Good job!
1,1,STOP IT NOW DUMMY!


In [7]:
# There's no good way in pandas to join on a range of values, so easiest to use sql for any part involving a range. The rest can be done easily with merges.

conn = sqlite3.connect(':memory:')
patient_df.to_sql("patient_df", conn, index=False)
BloodPressure_range_df.to_sql("BloodPressure_range_df", conn, index=False)
query = "SELECT * FROM patient_df LEFT JOIN BloodPressure_range_df ON patient_df.BloodPressure BETWEEN BloodPressure_range_df.Low AND BloodPressure_range_df.High"
new_patient_df = pd.read_sql_query(query, conn)

new_patient_df

Unnamed: 0,PatientID,BloodPressure,Smoking,BloodPressureDiagnosisID,Low,High
0,1,168,1,2,141,5000
1,2,91,0,0,0,120
2,3,125,1,1,121,140
3,4,142,1,2,141,5000
4,5,165,1,2,141,5000
5,6,133,1,1,121,140
6,7,92,0,0,0,120
7,8,145,1,2,141,5000
8,9,105,1,0,0,120
9,10,112,0,0,0,120


In [8]:
new_patient_df.merge(BloodPressureDiagnosis_df).merge(SmokingDiagnosis_df).sort_values('PatientID')

Unnamed: 0,PatientID,BloodPressure,Smoking,BloodPressureDiagnosisID,Low,High,BloodPressureDiagnosis,SmokingDiagnosis
0,1,168,1,2,141,5000,You're screwed!,STOP IT NOW DUMMY!
36,2,91,0,0,0,120,No treatment needed,Good job!
21,3,125,1,1,121,140,Exercise Daily,STOP IT NOW DUMMY!
1,4,142,1,2,141,5000,You're screwed!,STOP IT NOW DUMMY!
2,5,165,1,2,141,5000,You're screwed!,STOP IT NOW DUMMY!
22,6,133,1,1,121,140,Exercise Daily,STOP IT NOW DUMMY!
37,7,92,0,0,0,120,No treatment needed,Good job!
3,8,145,1,2,141,5000,You're screwed!,STOP IT NOW DUMMY!
13,9,105,1,0,0,120,No treatment needed,STOP IT NOW DUMMY!
38,10,112,0,0,0,120,No treatment needed,Good job!
