<a href="https://colab.research.google.com/github/anushirahatti/mit/blob/master/covid_inpatient_cases.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Ingest

In [0]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules
import matplotlib.pyplot as plt

In [2]:
inpatient_cases_data = pd.read_csv("https://raw.githubusercontent.com/anushirahatti/mit/master/inpatient_cases_data.csv")
inpatient_cases_data.head()

Unnamed: 0,Gender,Age,Clinical classification,Comorbidities,Cardiovascular and cerebrovascular disease,Endocrine system disease,Malignant tumor,Respiratory system disease,Digestive system disease,Renal disease,Liver disease,Fever,Cough,Chest tightness,Fatigue,Diarrhea,Other Signs and Symptoms,Clinical outcome,COVID-19 RNA clearance,Chest CT findings at discharge
0,Male,47,Heavy,N,N,N,N,N,N,N,N,Y,Y,Y,Y,Y,N,"Aggravated,turn to ICU",N,Advances
1,Female,53,Ordinary,N,N,N,N,N,N,N,N,Y,Y,Y,N,N,N,Improved and discharge,Y,Absorption
2,Female,57,Ordinary,N,N,N,N,N,N,N,N,Y,Y,N,N,N,N,Improved and discharge,Y,No change
3,Male,58,Heavy,Y,N,Y,N,N,N,N,N,N,N,N,N,N,N,"Aggravated,transfer to superior hospital",N,Advances
4,Male,54,Ordinary,N,N,N,N,N,N,N,N,Y,Y,Y,N,N,N,Improved and discharge,Y,Absorption


# Data Preprocessing

In [0]:
#Binning age

def age_group(x):
    if int(x) >= 0 and int(x) < 18:
        x = "Children"
    elif int(x) >= 18 and int(x) < 26:
        x = 'Young Adults'
    elif int(x) >= 26 and int(x) < 41:
        x = 'Adults'
    elif int(x) >= 41 and int(x) < 61:
        x = 'Middle Aged Adults'
    elif int(x) >= 61:
        x = 'Old Adults'
    return x
inpatient_cases_data['Age Group'] = inpatient_cases_data.Age.apply(age_group)

In [4]:
inpatient_cases_data.head()

Unnamed: 0,Gender,Age,Clinical classification,Comorbidities,Cardiovascular and cerebrovascular disease,Endocrine system disease,Malignant tumor,Respiratory system disease,Digestive system disease,Renal disease,Liver disease,Fever,Cough,Chest tightness,Fatigue,Diarrhea,Other Signs and Symptoms,Clinical outcome,COVID-19 RNA clearance,Chest CT findings at discharge,Age Group
0,Male,47,Heavy,N,N,N,N,N,N,N,N,Y,Y,Y,Y,Y,N,"Aggravated,turn to ICU",N,Advances,Middle Aged Adults
1,Female,53,Ordinary,N,N,N,N,N,N,N,N,Y,Y,Y,N,N,N,Improved and discharge,Y,Absorption,Middle Aged Adults
2,Female,57,Ordinary,N,N,N,N,N,N,N,N,Y,Y,N,N,N,N,Improved and discharge,Y,No change,Middle Aged Adults
3,Male,58,Heavy,Y,N,Y,N,N,N,N,N,N,N,N,N,N,N,"Aggravated,transfer to superior hospital",N,Advances,Middle Aged Adults
4,Male,54,Ordinary,N,N,N,N,N,N,N,N,Y,Y,Y,N,N,N,Improved and discharge,Y,Absorption,Middle Aged Adults


In [0]:
inpatient_cases_data = inpatient_cases_data.drop(['Age'], axis=1)

In [6]:
print(inpatient_cases_data.columns)

Index(['Gender', 'Clinical classification', 'Comorbidities',
       'Cardiovascular and cerebrovascular disease',
       ' Endocrine system disease', 'Malignant tumor',
       'Respiratory system disease', 'Digestive system disease',
       'Renal disease', 'Liver disease', 'Fever', 'Cough', 'Chest tightness',
       'Fatigue', 'Diarrhea', 'Other Signs and Symptoms', 'Clinical outcome',
       'COVID-19 RNA clearance', 'Chest CT findings at discharge',
       'Age Group'],
      dtype='object')


In [7]:
inpatient_cases_data.columns = inpatient_cases_data.columns.str.strip()
inpatient_cases_data.columns.tolist()

['Gender',
 'Clinical classification',
 'Comorbidities',
 'Cardiovascular and cerebrovascular disease',
 'Endocrine system disease',
 'Malignant tumor',
 'Respiratory system disease',
 'Digestive system disease',
 'Renal disease',
 'Liver disease',
 'Fever',
 'Cough',
 'Chest tightness',
 'Fatigue',
 'Diarrhea',
 'Other Signs and Symptoms',
 'Clinical outcome',
 'COVID-19 RNA clearance',
 'Chest CT findings at discharge',
 'Age Group']

In [8]:
col = ['Gender','Clinical classification','Clinical outcome','Chest CT findings at discharge','Age Group']
inpatient_cases_data = pd.get_dummies(inpatient_cases_data, columns=col)
inpatient_cases_data.head(5)

Unnamed: 0,Comorbidities,Cardiovascular and cerebrovascular disease,Endocrine system disease,Malignant tumor,Respiratory system disease,Digestive system disease,Renal disease,Liver disease,Fever,Cough,Chest tightness,Fatigue,Diarrhea,Other Signs and Symptoms,COVID-19 RNA clearance,Gender_Female,Gender_Male,Clinical classification_Heavy,Clinical classification_Ordinary,"Clinical outcome_Aggravated,transfer to superior hospital","Clinical outcome_Aggravated,turn to ICU",Clinical outcome_Died,Clinical outcome_Improved and discharge,Chest CT findings at discharge_Absorption,Chest CT findings at discharge_Advances,Chest CT findings at discharge_No change,Age Group_Adults,Age Group_Middle Aged Adults,Age Group_Old Adults,Age Group_Young Adults
0,N,N,N,N,N,N,N,N,Y,Y,Y,Y,Y,N,N,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0
1,N,N,N,N,N,N,N,N,Y,Y,Y,N,N,N,Y,1,0,0,1,0,0,0,1,1,0,0,0,1,0,0
2,N,N,N,N,N,N,N,N,Y,Y,N,N,N,N,Y,1,0,0,1,0,0,0,1,0,0,1,0,1,0,0
3,Y,N,Y,N,N,N,N,N,N,N,N,N,N,N,N,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0
4,N,N,N,N,N,N,N,N,Y,Y,Y,N,N,N,Y,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0


In [0]:
for i in inpatient_cases_data.columns:
    inpatient_cases_data[i].replace('Y', True, inplace=True)
    inpatient_cases_data[i].replace('N', False, inplace=True)
    inpatient_cases_data[i].replace(1, True, inplace=True)
    inpatient_cases_data[i].replace(0, False, inplace=True)

In [10]:
inpatient_cases_data.head(5)

Unnamed: 0,Comorbidities,Cardiovascular and cerebrovascular disease,Endocrine system disease,Malignant tumor,Respiratory system disease,Digestive system disease,Renal disease,Liver disease,Fever,Cough,Chest tightness,Fatigue,Diarrhea,Other Signs and Symptoms,COVID-19 RNA clearance,Gender_Female,Gender_Male,Clinical classification_Heavy,Clinical classification_Ordinary,"Clinical outcome_Aggravated,transfer to superior hospital","Clinical outcome_Aggravated,turn to ICU",Clinical outcome_Died,Clinical outcome_Improved and discharge,Chest CT findings at discharge_Absorption,Chest CT findings at discharge_Advances,Chest CT findings at discharge_No change,Age Group_Adults,Age Group_Middle Aged Adults,Age Group_Old Adults,Age Group_Young Adults
0,False,False,False,False,False,False,False,False,True,True,True,True,True,False,False,False,True,True,False,False,True,False,False,False,True,False,False,True,False,False
1,False,False,False,False,False,False,False,False,True,True,True,False,False,False,True,True,False,False,True,False,False,False,True,True,False,False,False,True,False,False
2,False,False,False,False,False,False,False,False,True,True,False,False,False,False,True,True,False,False,True,False,False,False,True,False,False,True,False,True,False,False
3,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,False,True,False,False,False,False,True,False,False,True,False,False
4,False,False,False,False,False,False,False,False,True,True,True,False,False,False,True,False,True,False,True,False,False,False,True,True,False,False,False,True,False,False


In [11]:
inpatient_cases_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 73 entries, 0 to 72
Data columns (total 30 columns):
 #   Column                                                     Non-Null Count  Dtype
---  ------                                                     --------------  -----
 0   Comorbidities                                              73 non-null     bool 
 1   Cardiovascular and cerebrovascular disease                 73 non-null     bool 
 2   Endocrine system disease                                   73 non-null     bool 
 3   Malignant tumor                                            73 non-null     bool 
 4   Respiratory system disease                                 73 non-null     bool 
 5   Digestive system disease                                   73 non-null     bool 
 6   Renal disease                                              73 non-null     bool 
 7   Liver disease                                              73 non-null     bool 
 8   Fever                           

# Association Rules Mining

In [12]:
freq_items = apriori(inpatient_cases_data, min_support=0.6, use_colnames=True, max_len=None)
freq_items.head(10)

Unnamed: 0,support,itemsets
0,0.753425,(Fever)
1,0.753425,(Cough)
2,0.945205,(COVID-19 RNA clearance)
3,0.671233,(Clinical classification_Ordinary)
4,0.945205,(Clinical outcome_Improved and discharge)
5,0.876712,(Chest CT findings at discharge_Absorption)
6,0.712329,"(Fever, COVID-19 RNA clearance)"
7,0.712329,"(Fever, Clinical outcome_Improved and discharge)"
8,0.657534,"(Chest CT findings at discharge_Absorption, Fe..."
9,0.712329,"(COVID-19 RNA clearance, Cough)"


In [13]:
rules = association_rules(freq_items, metric="confidence", min_threshold=0.8)
rules.head(20)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Fever),(COVID-19 RNA clearance),0.753425,0.945205,0.712329,0.945455,1.000264,0.000188,1.004566
1,(Fever),(Clinical outcome_Improved and discharge),0.753425,0.945205,0.712329,0.945455,1.000264,0.000188,1.004566
2,(Fever),(Chest CT findings at discharge_Absorption),0.753425,0.876712,0.657534,0.872727,0.995455,-0.003002,0.968689
3,(Cough),(COVID-19 RNA clearance),0.753425,0.945205,0.712329,0.945455,1.000264,0.000188,1.004566
4,(Cough),(Clinical outcome_Improved and discharge),0.753425,0.945205,0.712329,0.945455,1.000264,0.000188,1.004566
5,(Cough),(Chest CT findings at discharge_Absorption),0.753425,0.876712,0.657534,0.872727,0.995455,-0.003002,0.968689
6,(Clinical classification_Ordinary),(COVID-19 RNA clearance),0.671233,0.945205,0.671233,1.0,1.057971,0.03678,inf
7,(COVID-19 RNA clearance),(Clinical outcome_Improved and discharge),0.945205,0.945205,0.945205,1.0,1.057971,0.051792,inf
8,(Clinical outcome_Improved and discharge),(COVID-19 RNA clearance),0.945205,0.945205,0.945205,1.0,1.057971,0.051792,inf
9,(Chest CT findings at discharge_Absorption),(COVID-19 RNA clearance),0.876712,0.945205,0.876712,1.0,1.057971,0.048039,inf


In [0]:
#converting dataframe to list of lists

#inpatient_cases_list = []
#for i in range(0, 73):
#    inpatient_cases_list.append([str(inpatient_cases_data.values[i,j]) for j in range(0, 20)])

In [0]:
#inpatient_cases_list

In [0]:
#applying association rules

#from apyori import apriori

#association_rules = apriori(inpatient_cases_list, min_support=0.05, min_confidence=0.80, min_lift=2, min_length=2)
#association_results = list(association_rules)

In [0]:
#print(len(association_results))

In [0]:
#print(association_results[0])