In [99]:
# Scientific paper
## Rule Extraction for Screening of COVID-19 Disease Using Granular Computing Approach

In [1]:
## Importing libraries
import pandas as pd
import numpy as np
from bigtree import Node, findall, find_names, find_paths, find_attrs

In [2]:
## Importing dataset
df = df = pd.read_excel("../Datasets/rule_extraction_covid.xlsx")

In [3]:
df

Unnamed: 0,Object,Fever,Headaches,Dry cough,Sore throat,Shortness of breath,Nausea and vomiting,Class
0,1,L,M,L,M,M,M,3
1,2,VH,L,H,H,L,L,4
2,3,VH,L,M,H,H,L,5
3,4,H,L,VH,H,L,L,4
4,5,M,VH,VH,H,L,L,2
5,6,VH,L,M,H,L,L,4
6,7,L,VH,L,H,M,L,3
7,8,L,VH,L,M,VH,M,4
8,9,VH,H,H,M,VH,M,4
9,10,M,H,H,M,L,L,2


In [4]:
information_table = df.drop(columns = 'Object')

In [5]:
information_table

Unnamed: 0,Fever,Headaches,Dry cough,Sore throat,Shortness of breath,Nausea and vomiting,Class
0,L,M,L,M,M,M,3
1,VH,L,H,H,L,L,4
2,VH,L,M,H,H,L,5
3,H,L,VH,H,L,L,4
4,M,VH,VH,H,L,L,2
5,VH,L,M,H,L,L,4
6,L,VH,L,H,M,L,3
7,L,VH,L,M,VH,M,4
8,VH,H,H,M,VH,M,4
9,M,H,H,M,L,L,2


In [6]:
# We define a function to create formulas and granules

def getFormulasGranules(column_attr, info_table):
    
    # Basic granules table containing a Formula F with its Granule G
    basic_granules_table = pd.DataFrame(columns = ['Formula', 'Granule'])
    
    # Index i
    i = 0
    
    for attr in column_attr:
        group = info_table.groupby( by = attr ).groups
        keys = list(group.keys())
        values = list(group.values())

        for key in keys:
            basic_granules_table.loc[i, 'Formula'] = attr + "=" + key
            basic_granules_table.loc[i, 'Granule'] = list(group[key])
            i += 1
        
    return basic_granules_table

In [8]:
# We define a function to compute generality measure
# G = number of granules / total number of objects 

def getGenerality(basic_gr_table, info_table):
    for i in range(len(basic_gr_table)):
        obj_in_granule =  len(basic_gr_table.loc[i, 'Granule'])
        U = len(info_table)
        basic_gr_table.loc[i, 'Generality'] = obj_in_granule / U

In [9]:
# We define a function to compute confidence measure
# We define a function to count the total classes of each object

def countClasseGr(objects_Gr, info_table):
    class_1 = 0
    class_2 = 0
    class_3 = 0
    class_4 = 0
    class_5 = 0
    
    for i in objects_Gr:
        if info_table.loc[i, 'Class'] == 1:
            class_1 += 1
        elif info_table.loc[i, 'Class'] == 2:
            class_2 += 1
        elif info_table.loc[i, 'Class'] == 3:
            class_3 += 1
        elif info_table.loc[i, 'Class'] == 4:
            class_4 += 1
        elif info_table.loc[i, 'Class'] == 5:
            class_5 += 1
    
    return class_1, class_2, class_3, class_4, class_5

def getConfidence( basic_gr_table, info_table ):
    
    
    for i in range(len(basic_gr_table)):
        obj_Gr = basic_gr_table.loc[i, 'Granule']
    
        class_1, class_2, class_3, class_4, class_5 = countClasseGr(obj_Gr, info_table)
    
        basic_gr_table.loc[i, 'confidence_1']  = class_1 / len(obj_Gr)
        basic_gr_table.loc[i, 'confidence_2']  = class_2 / len(obj_Gr)
        basic_gr_table.loc[i, 'confidence_3']  = class_3 / len(obj_Gr)
        basic_gr_table.loc[i, 'confidence_4']  = class_4 / len(obj_Gr)
        basic_gr_table.loc[i, 'confidence_5']  = class_5 / len(obj_Gr)     

In [10]:
# We define a function to compute the coverage

def getCoverage(basic_gr_table, info_table):
    if 1 in info_table[['Class']].values:
        class_1_count = len(info_table.groupby( by = 'Class').groups[1])
    else:
        class_1_count = 0

    if 2 in info_table[['Class']].values:
        class_2_count = len(info_table.groupby( by = 'Class').groups[2])
    else:
        class_2_count = 0
    if 3 in info_table[['Class']].values:
        class_3_count = len(info_table.groupby( by = 'Class').groups[3])
    else:
        class_3_count = 0
    if 4 in info_table[['Class']].values:
        class_4_count = len(info_table.groupby( by = 'Class').groups[4])
    else:
        class_4_count = 0
    if 5 in info_table[['Class']].values:
        class_5_count = len(info_table.groupby( by = 'Class').groups[5])
    else:
        class_5_count = 0

    for i in range(len(basic_gr_table)):
        obj_Gr = basic_gr_table.loc[i, 'Granule']

        class_1, class_2, class_3, class_4, class_5 = countClasseGr(obj_Gr, info_table)

        basic_gr_table.loc[i, 'coverage_1']  = 0
        basic_gr_table.loc[i, 'coverage_2']  = (class_2 / class_2_count if class_2_count != 0 else 0)
        basic_gr_table.loc[i, 'coverage_3']  = (class_3 / class_3_count if class_3_count != 0 else 0)
        basic_gr_table.loc[i, 'coverage_4']  = (class_4 / class_4_count if class_4_count != 0 else 0)
        basic_gr_table.loc[i, 'coverage_5']  = (class_5 / class_5_count if class_5_count != 0 else 0)

In [11]:
# We define a function to compute the entropy

def getEntropy(basic_gr_table, info_table):
    
    res = 0

    for i in range(len(basic_gr_table)):

        for j in range(1,6):
            p_ = basic_gr_table.loc[i, 'confidence_'+str(j)]
            if p_ == 0:
                res += 0
            else:
                res += -( p_ * np.log10(p_) )

        basic_gr_table.loc[i, 'entropy'] = res
        res = 0


In [12]:
'''
    Goal: Trying to construct the Granular netwok
'''
# Covering solution ==> A list that contains the covered solutions
covering_solution = list()
# Attributes ==> Columns or Features
attributes = ['Fever','Headaches','Dry cough','Sore throat','Shortness of breath','Nausea and vomiting']
# Information table ==> A table that contains the attributes (features) and objects (data)
u_info_table = information_table
# Table of basic granules and their measurments
u_B_Granules = getFormulasGranules( attributes, information_table )

In [13]:
u_B_Granules

Unnamed: 0,Formula,Granule
0,Fever=H,"[3, 13]"
1,Fever=L,"[0, 6, 7, 10, 16]"
2,Fever=M,"[4, 9, 12, 14, 15, 17, 19]"
3,Fever=VH,"[1, 2, 5, 8, 11, 18]"
4,Headaches=H,"[8, 9, 14, 15, 19]"
5,Headaches=L,"[1, 2, 3, 5, 11, 12, 13]"
6,Headaches=M,"[0, 10, 16, 17, 18]"
7,Headaches=VH,"[4, 6, 7]"
8,Dry cough=H,"[1, 8, 9, 11, 13, 14, 15]"
9,Dry cough=L,"[0, 6, 7, 10, 16, 17, 19]"


In [14]:
#Generality
getGenerality(u_B_Granules, u_info_table)
#Confidence
getConfidence(u_B_Granules, u_info_table)
#Coverage
getCoverage(u_B_Granules, u_info_table)
#Entropy
getEntropy(u_B_Granules, u_info_table)

In [15]:
u_B_Granules.sort_values(by = ['entropy','Generality'], ascending = [True, False], inplace = True)
u_B_Granules.reset_index(drop=True, inplace=True)

In [16]:
u_B_Granules

Unnamed: 0,Formula,Granule,Generality,confidence_1,confidence_2,confidence_3,confidence_4,confidence_5,coverage_1,coverage_2,coverage_3,coverage_4,coverage_5,entropy
0,Shortness of breath=M,"[0, 6, 10, 16]",0.2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,Fever=H,"[3, 13]",0.1,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0
2,Shortness of breath=VH,"[7, 8]",0.1,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0
3,Fever=L,"[0, 6, 7, 10, 16]",0.25,0.0,0.0,0.8,0.2,0.0,0.0,0.0,1.0,0.142857,0.0,0.217322
4,Headaches=H,"[8, 9, 14, 15, 19]",0.25,0.0,0.8,0.0,0.2,0.0,0.0,0.8,0.0,0.142857,0.0,0.217322
5,Headaches=L,"[1, 2, 3, 5, 11, 12, 13]",0.35,0.0,0.0,0.0,0.714286,0.285714,0.0,0.0,0.0,0.714286,0.666667,0.259825
6,Fever=VH,"[1, 2, 5, 8, 11, 18]",0.3,0.0,0.0,0.0,0.666667,0.333333,0.0,0.0,0.0,0.571429,0.666667,0.276435
7,Dry cough=M,"[2, 5, 18]",0.15,0.0,0.0,0.0,0.333333,0.666667,0.0,0.0,0.0,0.142857,0.666667,0.276435
8,Shortness of breath=H,"[2, 11, 12]",0.15,0.0,0.0,0.0,0.333333,0.666667,0.0,0.0,0.0,0.142857,0.666667,0.276435
9,Nausea and vomiting=M,"[0, 7, 8, 10, 16]",0.25,0.0,0.0,0.6,0.4,0.0,0.0,0.0,0.75,0.285714,0.0,0.292285


In [21]:
# Level of granulation
level = 1
# The universe covering solution objects
U_covering_solution = []
# The universe remaining objects
U_remaining_objs = []
# Entropy threshold
min_entropy_threshold = 0
# Initializing the decision tree, where the universe U is the root
root = Node("U")

# The function takes two parameters
def GranuleNetBuilding(inf_table, parent_node):
    # Using the keyword "global" to refer to the global variable level
    global level
    # Local covering solution
    covering_solution = []
    # Local remaining objects
    remaining_objs = list(inf_table.index)
    
    # Initializing the basic granules u_B_Granules
    u_B_Granules = getFormulasGranules( attributes, inf_table )
    #Generality
    getGenerality(u_B_Granules, inf_table)
    #Confidence
    getConfidence(u_B_Granules, inf_table)
    #Coverage
    getCoverage(u_B_Granules, inf_table)
    #Entropy
    getEntropy(u_B_Granules, inf_table)
    
    # Sorting the values by min entropy and max generality
    u_B_Granules.sort_values(by = ['entropy','Generality'], ascending = [True, False], inplace = True)
    # Reseting the index of the sorted dataframe
    u_B_Granules.reset_index(drop=True, inplace=True)
    # Getting the indices of fomulas with min entropy value
    min_entropy_indx = u_B_Granules.loc[(u_B_Granules['entropy'] <= min_entropy_threshold)].index
    
    # 
    for i in min_entropy_indx:
        granules = u_B_Granules.loc[i,'Granule']
        entropy_val = u_B_Granules.loc[i,'entropy']
        formula = u_B_Granules.loc[i,'Formula']
        
        not_dup = list(set(granules) - set(U_covering_solution)) 
        
        if not_dup:
            
            if entropy_val != 0:
                infTable = information_table.iloc[granules, :]
                level += 1
                GranuleNetBuilding(infTable, parent_node = u_B_Granules.loc[i,'Formula'])
                level -= 1
            else:
                val_out = inf_table.loc[granules[0], "Class"]
                b_node = Node(formula,class_ = val_out, parent = parent_node)
                print("\t"*level,"LEVEL =====> ", level)
                print("\t"*level,"Formula: ", u_B_Granules.loc[i,'Formula'])
                print("\t"*level,"Granules: ", u_B_Granules.loc[i,'Granule'])
                covering_solution.extend(list(u_B_Granules.loc[i,'Granule']))
                U_covering_solution.extend(list(u_B_Granules.loc[i,'Granule']))
                remaining_objs = set(inf_table.index) - set(covering_solution)
        else:
            covering_solution.extend(list(u_B_Granules.loc[i,'Granule']))
            remaining_objs = set(inf_table.index) - set(covering_solution)
        u_B_Granules.drop(i, inplace = True)
            
    while len(remaining_objs) > 0:
        min_coverage_index = u_B_Granules.index[0]
        coverage_inter = len(set(u_B_Granules.loc[min_coverage_index, 'Granule']) & set(remaining_objs))
        coverage_union = len(set(u_B_Granules.loc[min_coverage_index, 'Granule']) | set(remaining_objs))
        coverage_val = coverage_inter / coverage_union

        for i in u_B_Granules.index:
            coverage_inter = len(set(u_B_Granules.loc[i, 'Granule']) & set(remaining_objs))
            coverage_union = len(set(u_B_Granules.loc[i, 'Granule']) | set(remaining_objs))
            
            cov_temp = coverage_inter / coverage_union
            
            if cov_temp > coverage_val:
                    min_coverage_index = i
                    coverage_val = cov_temp
        granules = u_B_Granules.loc[min_coverage_index,'Granule']       
        not_dup = list(set(granules) - set(covering_solution)) 
        formula = u_B_Granules.loc[min_coverage_index,'Formula']
        
        print("\t"*level,"LEVEL =====> ", level)
        print("\t"*level,"Formula: ", u_B_Granules.loc[min_coverage_index,'Formula'])
        print("\t"*level,"Granules: ", u_B_Granules.loc[min_coverage_index,'Granule'])
        
        covering_solution.extend(list(u_B_Granules.loc[min_coverage_index,'Granule']))
        remaining_objs = set(inf_table.index) - set(covering_solution)
        u_B_Granules.drop(min_coverage_index, inplace = True)
        
        infTable = information_table.iloc[granules, :]
        level += 1
        rt = Node(formula, parent = parent_node)
        GranuleNetBuilding(infTable, rt)
        level -= 1

In [22]:
GranuleNetBuilding(information_table, root)

	 LEVEL =====>  1
	 Formula:  Shortness of breath=M
	 Granules:  [0, 6, 10, 16]
	 LEVEL =====>  1
	 Formula:  Fever=H
	 Granules:  [3, 13]
	 LEVEL =====>  1
	 Formula:  Shortness of breath=VH
	 Granules:  [7, 8]
	 LEVEL =====>  1
	 Formula:  Nausea and vomiting=L
	 Granules:  [1, 2, 3, 4, 5, 6, 9, 11, 12, 13, 14, 15, 17, 18, 19]
		 LEVEL =====>  2
		 Formula:  Headaches=H
		 Granules:  [9, 14, 15, 19]
		 LEVEL =====>  2
		 Formula:  Fever=VH
		 Granules:  [1, 2, 5, 11, 18]
			 LEVEL =====>  3
			 Formula:  Dry cough=H
			 Granules:  [1, 11]
			 LEVEL =====>  3
			 Formula:  Headaches=M
			 Granules:  [18]
			 LEVEL =====>  3
			 Formula:  Dry cough=M
			 Granules:  [2, 5, 18]
				 LEVEL =====>  4
				 Formula:  Shortness of breath=H
				 Granules:  [2]
				 LEVEL =====>  4
				 Formula:  Headaches=L
				 Granules:  [2, 5]
					 LEVEL =====>  5
					 Formula:  Shortness of breath=L
					 Granules:  [5]
		 LEVEL =====>  2
		 Formula:  Dry cough=VH
		 Granules:  [3, 4, 12]
			 LEVEL =====

In [23]:
root.show(attr_list = ["class_"])

U
├── Shortness of breath=M [class_=3]
├── Fever=H [class_=4]
├── Shortness of breath=VH [class_=4]
└── Nausea and vomiting=L
    ├── Headaches=H [class_=2]
    ├── Fever=VH
    │   ├── Dry cough=H [class_=4]
    │   ├── Headaches=M [class_=5]
    │   └── Dry cough=M
    │       ├── Shortness of breath=H [class_=5]
    │       └── Headaches=L
    │           └── Shortness of breath=L [class_=4]
    ├── Dry cough=VH
    │   ├── Headaches=VH [class_=2]
    │   └── Shortness of breath=H [class_=5]
    └── Headaches=M
        └── Fever=M [class_=1]


In [121]:
len(U_covering_solution)

20

In [122]:
len(U_covering_solution)

20