In [1]:
# importing modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.stats.proportion import test_proportions_2indep
from scipy.stats import ttest_ind
from scipy.interpolate import UnivariateSpline
import json
from urllib.request import urlopen
import plotly.express as px

In [2]:
df = pd.read_excel("./data/CentracareSocialIsolationData.xlsx")

### Brief Documentation 
Each bullet point describes the data stored in each column of `df`
* **ID**
    * A unique identifier for each subject

* **FLO_MEAS_ID**
    * A unique number associated with each question, relevant to CentraCare

* **Last Date**
    * Likely related to the date on which patietn responded to questionairre

* **DISP_NAME**
    * The question asked to patient (text)

* **MEAS_VALUE**
    * Encoded patient response to question
    * `nan` if patient did not answer

* **MEAS_VALUE_EXTERNAL**
    * Decoded patient response to question (text)
    * `nan` if patient did not answer

* **Active Care Coordination**
    * `"1"`, if patient is in active care coordination
    * `"0"`, if patient is *not* in active care coordination

* **Age**
    * Patient's age at time of taking questionairre
    
* **Alcoholism**
    * `"Y"`, if patient suffers from alcholims
    * `nan`, if patient does *not* suffer from alcholism

* **CHF**
    * `"Y"`, if patient suffers from congestive heart failure
    * `nan`, if patient does *not* suffer from congestive heart failure

* **COPD**
    * `"Yes"`, if patient suffers from chronic obstructive pulmonary disease (COPD)
    * `nan`, if patient does *not* suffer from chronic obstructive pulmonary disease (COPD)

* **Depression**
    * `"Yes"`, if patient suffers from depression
    * `nan`, if patient does *not* suffer from depression

* **Diabetes**
    * `"Yes"`, if patient suffers from diabetes
    * `nan`, if patient does *not* suffer from diabetes
 
* **Ethnicity**
    * `"Non-Hispanic"`, if non-Hispanic
    * `"Hispanic"`, if Hispanic
    * `"Unknown"`, if unknown ethnicity
    * `"Choose Not to Disclose"`, if patient chose not to disclose
    * `nan`, if unkown

* **Gender**
    * `"M"`, if male
    * `"F"`, if female
    * `"X"`, if unspecific
    * `"U"`, if undisclosed
    
* **HTN**
    * `"Yes"`, if patient suffers from hypertension
    * `nan`, if patient does *not* suffer from hypertension

* **Language**
    * Patient's primary language
    * `"Unable to Obtain`, `"Unknown"`, and `"Unable to Read"` are included

* **LGB**
    * `"G"`, if gay
    * `"L"`, if lesbian
    * `nan`, if heterosexual *or* other

* **PCP Department**
    * Patient's primary care department *and/or* where the questionairre was taken
    * `nan`, if unable to obtain

* **Race**
    * Patient's race
    * `nan`, if unable to obtain
    
* **Tobacco Use**
    * `"Yes`, if patient regularly uses tobacco
    * `"No"`, if patient does *not* regularly use tobacco
    * `"Passive"`, if patient passively inhales tobacco (e.g., lives with someone who smokes)
    * `nan`, if unable to obtain

* **vape_user**
    * `"Y"`, if patient regularly uses a vape
    * `"N"`, if patient does *not* regularly use a vape
    
* **Zipcode**
    * Patient's home zipcode
    * `nan`, if unable to obtain

In [27]:
berkman_syme = df["DISP_NAME"].unique()
subjects = df["ID"].unique()
berkman_syme

array(['Do you belong to any clubs or organizations such as church groups, unions, fraternal or athletic groups, or school groups?',
       'Are you married, widowed, divorced, separated, never married, or living with a partner?',
       'How often do you attend church or religious services?',
       'How often do you get together with friends or relatives?',
       'In a typical week, how many times do you talk on the phone with family, friends, or neighbors?',
       'How often do you attend meetings of the clubs or organizations you belong to?'],
      dtype=object)

In [38]:
df["Groups_YN_Val"] = ''
df["Groups_YN_Ext"] = ''
df["Marriage_Val"] = ''
df["Marriage_Ext"] = ''
df['Religious_Val'] = ''
df['Religious_Ext'] = ''
df['GetTogether_Val'] = ''
df['GetTogether_Ext'] = ''
df['Phone_Val'] = ''
df['Phone_Ext'] = ''
df["NumGroupMeetings_Val"] = ''
df["NumGroupMeetings_Ext"] = ''

row_vals = ["Groups_YN_Val", "Marriage_Val", 'Religious_Val', 'GetTogether_Val', 'Phone_Val', "NumGroupMeetings_Val"]
row_exts = ["Groups_YN_Ext", "Marriage_Ext", 'Religious_Ext', 'GetTogether_Ext', 'Phone_Ext', "NumGroupMeetings_Ext"]

In [49]:
for subj in subjects:
    subj_rows = df.loc[(df['ID'] == subj)]
    
    for index, item in enumerate(berkman_syme):
        row = subj_rows.loc[(subj_rows['DISP_NAME'] == item)]
        value = row['MEAS_VALUE']
        ext_value = row['MEAS_VALUE_EXTERNAL']
        
        df.loc[(df['ID'] == subj), row_vals[index]] = value
        df.loc[(df['ID'] == subj), row_exts[index]] = ext_value 
    

KeyboardInterrupt: 

In [46]:
df

Unnamed: 0,ID,FLO_MEAS_ID,Last Date,DISP_NAME,MEAS_VALUE,MEAS_VALUE_EXTERNAL,Active Care Coordination,Age,Alcoholism,CHF,...,Marriage_Val,Marriage_Ext,Religious_Val,Religious_Ext,GetTogether_Val,GetTogether_Ext,Phone_Val,Phone_Ext,NumGroupMeetings_Val,NumGroupMeetings_Ext
0,CC000001,1572879828,2022-07-20,Do you belong to any clubs or organizations su...,2.0,NO,0,74,,,...,,,,,,,,,,
1,CC000001,1572879830,2022-07-20,"Are you married, widowed, divorced, separated,...",3.0,MARRIED,0,74,,,...,3.0,MARRIED,,,,,,,,
2,CC000001,1572879827,2022-07-20,How often do you attend church or religious se...,1.0,NEVER,0,74,,,...,,,1.0,NEVER,,,,,,
3,CC000001,1572879826,2022-07-20,How often do you get together with friends or ...,1.0,NEVER,0,74,,,...,,,,,1.0,NEVER,,,,
4,CC000001,1572879825,2022-07-20,"In a typical week, how many times do you talk ...",1.0,NEVER,0,74,,,...,,,,,,,1.0,NEVER,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
511684,CC083135,1572879827,2022-10-31,How often do you attend church or religious se...,98.0,PATIENT REFUSED,0,31,,,...,,,,,,,,,,
511685,CC083135,1572879825,2022-10-31,"In a typical week, how many times do you talk ...",5.0,MORE THAN THREE TIMES A WEEK,0,31,,,...,,,,,,,,,,
511686,CC083135,1572879826,2022-10-31,How often do you get together with friends or ...,3.0,TWICE A WEEK,0,31,,,...,,,,,,,,,,
511687,CC083135,1572879828,2022-10-31,Do you belong to any clubs or organizations su...,2.0,NO,0,31,,,...,,,,,,,,,,
