# LIVER SERIES - Part 2
Statistical inference in all categorical variables.

In [1]:
# Import the necessary libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import chi2_contingency
from scipy.stats import chi2

In [2]:
# We need this additional code to be able to display tables/dataframes in one output.

from IPython.display import display, HTML

CSS = """
.output {
    flex-direction: row;
}
"""

HTML('<style>{}</style>'.format(CSS))

# Import the dataset

In [3]:
pbc = pd.read_csv('primary biliary cirrhosis RCT mod1.csv')
pbc.head()

Unnamed: 0,PatientID,DaySurvival,Status,Treatment,AgeYrs,Sex,Ascites,Hepatomegaly,SpiderAngiomata,Edema,Bilirubin,Cholesterol,Albumin,Copper,ALP,AST,Triglyceride,Platelet,Protime,HistologyStage
0,1,400,censored,penicillamine,58.77,F,yes,yes,yes,present,14.5,261.0,2.6,156.0,1718.0,137.95,172.0,190.0,12.2,4.0
1,2,4500,dead,penicillamine,56.45,F,no,yes,yes,absent,1.1,302.0,4.14,54.0,7394.8,113.52,88.0,221.0,10.6,3.0
2,3,1012,censored,penicillamine,70.07,M,no,no,no,diurese,1.4,176.0,3.48,210.0,516.0,96.1,55.0,151.0,12.0,4.0
3,4,1925,censored,penicillamine,54.74,F,no,yes,yes,diurese,1.8,244.0,2.54,64.0,6121.8,60.63,92.0,183.0,10.3,4.0
4,5,1504,transplant,placebo,38.11,F,no,yes,yes,absent,3.4,279.0,3.53,143.0,671.0,113.15,72.0,136.0,10.9,3.0


In [4]:
pbc.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 20 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   PatientID        418 non-null    int64  
 1   DaySurvival      418 non-null    int64  
 2   Status           418 non-null    object 
 3   Treatment        312 non-null    object 
 4   AgeYrs           418 non-null    float64
 5   Sex              418 non-null    object 
 6   Ascites          312 non-null    object 
 7   Hepatomegaly     312 non-null    object 
 8   SpiderAngiomata  312 non-null    object 
 9   Edema            418 non-null    object 
 10  Bilirubin        418 non-null    float64
 11  Cholesterol      284 non-null    float64
 12  Albumin          418 non-null    float64
 13  Copper           310 non-null    float64
 14  ALP              312 non-null    float64
 15  AST              312 non-null    float64
 16  Triglyceride     282 non-null    float64
 17  Platelet        

# Handling the null values in our dataset

How many null values does our dataset have?

In [5]:
pbc.isna().sum()

PatientID            0
DaySurvival          0
Status               0
Treatment          106
AgeYrs               0
Sex                  0
Ascites            106
Hepatomegaly       106
SpiderAngiomata    106
Edema                0
Bilirubin            0
Cholesterol        134
Albumin              0
Copper             108
ALP                106
AST                106
Triglyceride       136
Platelet            11
Protime              2
HistologyStage       6
dtype: int64

We will drop the all null values that coincide with the variable **Treatment**.

In [6]:
pbc = pbc.dropna(subset=['Treatment'])
pbc.tail()

Unnamed: 0,PatientID,DaySurvival,Status,Treatment,AgeYrs,Sex,Ascites,Hepatomegaly,SpiderAngiomata,Edema,Bilirubin,Cholesterol,Albumin,Copper,ALP,AST,Triglyceride,Platelet,Protime,HistologyStage
307,308,1153,dead,penicillamine,61.18,F,no,yes,no,absent,0.4,246.0,3.58,24.0,797.0,91.0,113.0,288.0,10.4,2.0
308,309,994,dead,placebo,58.3,F,no,no,no,absent,0.4,260.0,2.75,41.0,1166.0,70.0,82.0,231.0,10.8,2.0
309,310,939,dead,penicillamine,62.33,F,no,no,no,absent,1.7,434.0,3.35,39.0,1713.0,171.0,100.0,234.0,10.2,2.0
310,311,839,dead,penicillamine,38.0,F,no,no,no,absent,2.0,247.0,3.16,69.0,1050.0,117.0,88.0,335.0,10.5,2.0
311,312,788,dead,placebo,33.15,F,no,no,yes,absent,6.4,576.0,3.79,186.0,2115.0,136.0,149.0,200.0,10.8,2.0


Do we still have null values?

In [7]:
pbc.isna().sum()

PatientID           0
DaySurvival         0
Status              0
Treatment           0
AgeYrs              0
Sex                 0
Ascites             0
Hepatomegaly        0
SpiderAngiomata     0
Edema               0
Bilirubin           0
Cholesterol        28
Albumin             0
Copper              2
ALP                 0
AST                 0
Triglyceride       30
Platelet            4
Protime             0
HistologyStage      0
dtype: int64

For now we will not handle the continuous variables with null values because we will be focusing on categorical variables in this notebook.

# Create subset/variables

## Histology stage
Even though this variable is the last column of the main dataset, we need to first create it's subset so that it's changes will be applied to succeeding subsets/variables.
<br>
Here, we convert the integers into string so that it will be easier to categorize them in performing statistical inference.

In [8]:
pbc['HistologyStage'] = pbc['HistologyStage'].apply(str)
pbc['HistologyStage'].replace('1.0', 'Stage1', inplace=True)
pbc['HistologyStage'].replace('2.0', 'Stage2', inplace=True)
pbc['HistologyStage'].replace('3.0', 'Stage3', inplace=True)
pbc['HistologyStage'].replace('4.0', 'Stage4', inplace=True)

Stage1 = pbc[pbc['HistologyStage'] == 'Stage1']
Stage2 = pbc[pbc['HistologyStage'] == 'Stage2']
Stage3 = pbc[pbc['HistologyStage'] == 'Stage3']
Stage4 = pbc[pbc['HistologyStage'] == 'Stage4']

histo = pd.DataFrame(pbc['HistologyStage'].value_counts())
histo

Unnamed: 0,HistologyStage
Stage3,120
Stage4,109
Stage2,67
Stage1,16


## Censoring status

In [9]:
dead = pbc[pbc['Status'] == 'dead']
censored = pbc[pbc['Status'] == 'censored']
transplant = pbc[pbc['Status'] == 'transplant']

status = pd.DataFrame(pbc['Status'].value_counts())
status

Unnamed: 0,Status
dead,168
censored,125
transplant,19


## Treatment group

In [10]:
penicillamine = pbc[pbc['Treatment'] == 'penicillamine']
placebo = pbc[pbc['Treatment'] == 'placebo']

treat = pd.DataFrame(pbc['Treatment'].value_counts())
treat

Unnamed: 0,Treatment
penicillamine,158
placebo,154


## Sex

In [11]:
female = pbc[pbc['Sex'] == 'F']
male = pbc[pbc['Sex'] == 'M']

sex = pd.DataFrame(pbc['Sex'].value_counts())
sex

Unnamed: 0,Sex
F,276
M,36


## Ascites

In [12]:
noascites = pbc[pbc['Ascites'] == 'no']
yesascites = pbc[pbc['Ascites'] == 'yes']

ascites = pd.DataFrame(pbc['Ascites'].value_counts())
ascites

Unnamed: 0,Ascites
no,288
yes,24


## Hepatomegaly

In [13]:
nohepato = pbc[pbc['Hepatomegaly'] == 'no']
yeshepato = pbc[pbc['Hepatomegaly'] == 'yes']

hepato = pd.DataFrame(pbc['Hepatomegaly'].value_counts())
hepato

Unnamed: 0,Hepatomegaly
yes,160
no,152


## Spider angiomata

In [14]:
nospider = pbc[pbc['SpiderAngiomata'] == 'no']
yesspider = pbc[pbc['SpiderAngiomata'] == 'yes']

spider = pd.DataFrame(pbc['SpiderAngiomata'].value_counts())
spider

Unnamed: 0,SpiderAngiomata
no,222
yes,90


## Edema

In [15]:
present = pbc[pbc['Edema'] == 'present']
diurese = pbc[pbc['Edema'] == 'diurese']
absent = pbc[pbc['Edema'] == 'absent']

edema = pd.DataFrame(pbc['Edema'].value_counts())
edema

Unnamed: 0,Edema
absent,263
diurese,29
present,20


# Performing the $\chi^2$ test

## A. Censoring status

###(1) Is there a relationship between censoring status and treatment assignment?
$H_0$: There is no relation between censoring status and treatment assignment.
<br>
$H_A$: There is relation between censoring status and treatment assignment.
<br>
Significance level: $\alpha = 0.05$


In [72]:
# Create the contingency table - censoring status versus treatment assignment

deadpenicillamine = dead[dead['Treatment'] == 'penicillamine']
deadpenicillamine = deadpenicillamine['Treatment'].count()
deadplacebo = dead[dead['Treatment'] == 'placebo']
deadplacebo = deadplacebo['Treatment'].count()

censoredpenicillamine = censored[censored['Treatment'] == 'penicillamine']
censoredpenicillamine = censoredpenicillamine['Treatment'].count()
censoredplacebo = censored[censored['Treatment'] == 'placebo']
censoredplacebo = censoredplacebo['Treatment'].count()

transplantpenicillamine = transplant[transplant['Treatment'] == 'penicillamine']
transplantpenicillamine = transplantpenicillamine['Treatment'].count()
transplantplacebo = transplant[transplant['Treatment'] == 'placebo']
transplantplacebo = transplantplacebo['Treatment'].count()

CensorVsTreatment = pd.DataFrame({'Dead': [deadpenicillamine, deadplacebo], 
                             'Censored': [censoredpenicillamine, censoredplacebo],
                             'Transplant': [transplantpenicillamine, transplantplacebo]}, 
                            index=['Penicillamine', 'Placebo'])
CensorVsTreatment

Unnamed: 0,Dead,Censored,Transplant
Penicillamine,83,65,10
Placebo,85,60,9


In [73]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(CensorVsTreatment)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['Dead', 'Censored', 'Transplant'], 
                           index=['Penicillamine', 'Placebo'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,0.225196
Critical value,5.991465
p-value,0.89351
degrees of freedom,2.0


Unnamed: 0,Dead,Censored,Transplant
Penicillamine,85.076923,63.301282,9.621795
Placebo,82.923077,61.698718,9.378205


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 0.89 $>$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 0.225 $<$ Critical Value = 5.99
<br><br>
We will **not reject** our null hypothesis. Based on the data that we have, and the test that we did, we can say that there is no relationship between censoring status and treatment assignment.

###(2) Is there a relationship between censoring status and sex?
$H_0$: There is no relationship between censoring status and sex.
<br>
$H_A$: There is a significant relationship between censoring status and sex.
<br>
Significance level: $\alpha = 0.05$


In [74]:
# Create the contingency table - censoring status versus sex

deadfemale = dead[dead['Sex'] == 'F']
deadfemale = deadfemale['Sex'].count()
deadmale = dead[dead['Sex'] == 'M']
deadmale = deadmale['Sex'].count()

censoredfemale = censored[censored['Sex'] == 'F']
censoredfemale = censoredfemale['Sex'].count()
censoredmale = censored[censored['Sex'] == 'M']
censoredmale = censoredmale['Sex'].count()

transplantfemale = transplant[transplant['Sex'] == 'F']
transplantfemale = transplant['Sex'].count()
transplantmale = transplant[transplant['Sex'] == 'M']
transplantmale = transplantmale['Sex'].count()

CensorVsSex = pd.DataFrame({'Dead': [deadfemale, deadmale], 
                            'Censored': [censoredfemale, censoredmale],
                            'Transplant': [transplantfemale, transplantmale]}, 
                           index=['Female', 'Male'])
CensorVsSex

Unnamed: 0,Dead,Censored,Transplant
Female,157,103,19
Male,11,22,3


In [75]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(CensorVsSex)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['Dead', 'Censored', 'Transplant'], 
                           index=['Female', 'Male'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,8.763129
Critical value,5.991465
p-value,0.012506
degrees of freedom,2.0


Unnamed: 0,Dead,Censored,Transplant
Female,148.8,110.714286,19.485714
Male,19.2,14.285714,2.514286


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 0.0125 $<$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 8.76 $>$ Critical Value = 5.99
<br><br>
We will **reject** our null hypothesis. There is a relationship between censoring status and sex.

###(3) Is there a relationship between censoring status and presence of ascites?
$H_0$: There is no relationship between censoring status and presence of ascites.
<br>
$H_A$: There is a significant relationship between censoring status and presence of ascites.
<br>
Significance level: $\alpha = 0.05$


In [76]:
# Create the contingency table - censoring status versus ascites

deadascites = dead[dead['Ascites'] == 'yes']
deadascites = deadascites['Ascites'].count()
deadnoascites = dead[dead['Ascites'] == 'no']
deadnoascites = deadnoascites['Ascites'].count()

censoredascites = censored[censored['Ascites'] == 'yes']
censoredascites = censoredascites['Ascites'].count()
censorednoascites = censored[censored['Ascites'] == 'no']
censorednoascites = censorednoascites['Ascites'].count()

transplantascites = transplant[transplant['Ascites'] == 'yes']
transplantascites = transplantascites['Ascites'].count()
transplantnoascites = transplant[transplant['Ascites'] == 'no']
transplantnoascites = transplantnoascites['Ascites'].count()

CensorVsAscites = pd.DataFrame({'Dead': [deadascites, deadnoascites], 
                            'Censored': [censoredascites, censorednoascites],
                            'Transplant': [transplantascites, transplantnoascites]}, 
                           index=['With ascites', 'Without ascites'])
CensorVsAscites

Unnamed: 0,Dead,Censored,Transplant
With ascites,1,23,0
Without ascites,167,102,19


In [77]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(CensorVsAscites)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['Dead', 'Censored', 'Transplant'], 
                           index=['With ascites', 'Without ascites'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,33.6845
Critical value,5.991465
p-value,4.847353e-08
degrees of freedom,2.0


Unnamed: 0,Dead,Censored,Transplant
With ascites,12.923077,9.615385,1.461538
Without ascites,155.076923,115.384615,17.538462


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 4.84e-08 $<$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 3.37e+1 $>$ Critical Value = 5.99
<br><br>
We will **reject** our null hypothesis. There is a relationship between censoring status and presence of ascites.

###(4) Is there a relationship between censoring status and presence of hepatomegaly?
$H_0$: There is no relationship between censoring status and presence of hepatomegaly.
<br>
$H_A$: There is a significant relationship between censoring status and presence of hepatomegaly.
<br>
Significance level: $\alpha = 0.05$


In [79]:
# Create the contingency table - censoring status versus hepatomegaly

deadhepato = dead[dead['Hepatomegaly'] == 'yes']
deadhepato = deadhepato['Hepatomegaly'].count()
deadnohepato = dead[dead['Hepatomegaly'] == 'no']
deadnohepato = deadnohepato['Hepatomegaly'].count()

censoredhepato = censored[censored['Hepatomegaly'] == 'yes']
censoredhepato = censoredhepato['Hepatomegaly'].count()
censorednohepato = censored[censored['Hepatomegaly'] == 'no']
censorednohepato = censorednohepato['Hepatomegaly'].count()

transplanthepato = transplant[transplant['Hepatomegaly'] == 'yes']
transplanthepato = transplanthepato['Hepatomegaly'].count()
transplantnohepato = transplant[transplant['Hepatomegaly'] == 'no']
transplantnohepato = transplantnohepato['Hepatomegaly'].count()

CensorVsHepatomegaly = pd.DataFrame({'Dead': [deadhepato, deadnohepato], 
                            'Censored': [censoredhepato, censorednohepato],
                            'Transplant': [transplanthepato, transplantnohepato]}, 
                           index=['With hepatomegaly', 'Without hepatomegaly'])
CensorVsHepatomegaly

Unnamed: 0,Dead,Censored,Transplant
With hepatomegaly,60,88,12
Without hepatomegaly,108,37,7


In [80]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(CensorVsHepatomegaly)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['Dead', 'Censored', 'Transplant'], 
                           index=['With hepatomegaly', 'Without hepatomegaly'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,35.65639
Critical value,5.991465
p-value,1.808479e-08
degrees of freedom,2.0


Unnamed: 0,Dead,Censored,Transplant
With hepatomegaly,86.153846,64.102564,9.74359
Without hepatomegaly,81.846154,60.897436,9.25641


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 1.8e-08 $<$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 3.6e+1 $>$ Critical Value = 5.99
<br><br>
We will **reject** our null hypothesis. There is a relationship between censoring status and presence of hepatomegaly.

###(5) Is there a relationship between censoring status and presence of spider angiomata?
$H_0$: There is no relationship between censoring status and presence of spider angiomata.
<br>
$H_A$: There is a significant relationship between censoring status and presence of spider angiomata.
<br>
Significance level: $\alpha = 0.05$


In [81]:
# Create the contingency table - censoring status versus spider angiomata

deadspider = dead[dead['SpiderAngiomata'] == 'yes']
deadspider = deadspider['SpiderAngiomata'].count()
deadnospider = dead[dead['SpiderAngiomata'] == 'no']
deadnospider = deadnospider['SpiderAngiomata'].count()

censoredspider = censored[censored['SpiderAngiomata'] == 'yes']
censoredspider = censoredspider['SpiderAngiomata'].count()
censorednospider = censored[censored['SpiderAngiomata'] == 'no']
censorednospider = censorednospider['SpiderAngiomata'].count()

transplantspider = transplant[transplant['SpiderAngiomata'] == 'yes']
transplantspider = transplantspider['SpiderAngiomata'].count()
transplantnospider = transplant[transplant['SpiderAngiomata'] == 'no']
transplantnospider = transplantnospider['SpiderAngiomata'].count()

CensorVsSpiderAngiomata = pd.DataFrame({'Dead': [deadspider, deadnospider], 
                            'Censored': [censoredspider, censorednospider],
                            'Transplant': [transplantspider, transplantnospider]}, 
                           index=['With spider angiomata', 'Without spider angiomata'])
CensorVsSpiderAngiomata

Unnamed: 0,Dead,Censored,Transplant
With spider angiomata,33,52,5
Without spider angiomata,135,73,14


In [82]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(CensorVsSpiderAngiomata)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['Dead', 'Censored', 'Transplant'], 
                           index=['With spider angiomata', 'Without spider angiomata'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,16.898265
Critical value,5.991465
p-value,0.000214
degrees of freedom,2.0


Unnamed: 0,Dead,Censored,Transplant
With spider angiomata,48.461538,36.057692,5.480769
Without spider angiomata,119.538462,88.942308,13.519231


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 0.0002 $<$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 16.898 $>$ Critical Value = 5.99
<br><br>
We will **reject** our null hypothesis. There is a relationship between censoring status and presence of spider angiomata.

###(6) Is there a relationship between censoring status and presence of edema?
$H_0$: There is no relationship between censoring status and presence of edema.
<br>
$H_A$: There is a significant relationship between censoring status and presence of edema.
<br>
Significance level: $\alpha = 0.05$


In [83]:
# Create the contingency table - censoring status versus edema

deadedema = dead[dead['Edema'] == 'present']
deadedema = deadedema['Edema'].count()
deaddiurese = dead[dead['Edema'] == 'diurese']
deaddiurese = deaddiurese['Edema'].count()
deadabsent = dead[dead['Edema'] == 'absent']
deadabsent = deadabsent['Edema'].count()

censorededema = censored[censored['Edema'] == 'present']
censorededema = censorededema['Edema'].count()
censoreddiurese = censored[censored['Edema'] == 'diurese']
censoreddiurese = censoreddiurese['Edema'].count()
censoredabsent = censored[censored['Edema'] == 'absent']
censoredabsent = censoredabsent['Edema'].count()

transplantededema = transplant[transplant['Edema'] == 'present']
transplantededema = transplantededema['Edema'].count()
transplantdiurese = transplant[transplant['Edema'] == 'diurese']
transplantdiurese = transplantdiurese['Edema'].count()
transplantabsent = transplant[transplant['Edema'] == 'absent']
transplantabsent = transplantabsent['Edema'].count()

CensorVsEdema = pd.DataFrame({'Dead': [deadedema, deaddiurese, deadabsent], 
                            'Censored': [censorededema, censoreddiurese, censoredabsent],
                            'Transplant': [transplantededema, transplantdiurese, transplantabsent]}, 
                           index=['With edema', 'Diuresed edema', 'Absent edema'])
CensorVsEdema

Unnamed: 0,Dead,Censored,Transplant
With edema,1,19,0
Diuresed edema,10,17,2
Absent edema,157,89,17


In [84]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(CensorVsEdema)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['Dead', 'Censored', 'Transplant'], 
                           index=['With edema', 'Diuresed edema', 'Absent edema'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,33.96308
Critical value,9.487729
p-value,7.582937e-07
degrees of freedom,4.0


Unnamed: 0,Dead,Censored,Transplant
With edema,10.769231,8.012821,1.217949
Diuresed edema,15.615385,11.61859,1.766026
Absent edema,141.615385,105.36859,16.016026


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 7.58e-7 $<$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 3.396e+1 $>$ Critical Value = 9.488
<br><br>
We will **reject** our null hypothesis. There is a relationship between censoring status and edema.

###(7) Is there a relationship between censoring status and histological stage of the disease?
$H_0$: There is no relationship between censoring status and histological stage.
<br>
$H_A$: There is a significant relationship between censoring status and histological stage.
<br>
Significance level: $\alpha = 0.05$


In [130]:
# Create the contingency table - censoring status versus histological stage

deadStage1 = dead[dead['HistologyStage'] == 'Stage1']
deadStage1 = deadStage1['HistologyStage'].count()
deadStage2 = dead[dead['HistologyStage'] == 'Stage2']
deadStage2 = deadStage2['HistologyStage'].count()
deadStage3 = dead[dead['HistologyStage'] == 'Stage3']
deadStage3 = deadStage3['HistologyStage'].count()
deadStage4 = dead[dead['HistologyStage'] == 'Stage4']
deadStage4 = deadStage4['HistologyStage'].count()
censoredStage1 = censored[censored['HistologyStage'] == 'Stage1']
censoredStage1 = censoredStage1['HistologyStage'].count()
censoredStage2 = censored[censored['HistologyStage'] == 'Stage2']
censoredStage2 = censoredStage2['HistologyStage'].count()
censoredStage3 = censored[censored['HistologyStage'] == 'Stage3']
censoredStage3 = censoredStage3['HistologyStage'].count()
censoredStage4 = censored[censored['HistologyStage'] == 'Stage4']
censoredStage4 = censoredStage4['HistologyStage'].count()
transplantStage1 = transplant[transplant['HistologyStage'] == 'Stage1']
transplantStage1 = transplantStage1['HistologyStage'].count()
transplantStage2 = transplant[transplant['HistologyStage'] == 'Stage2']
transplantStage2 = transplantStage2['HistologyStage'].count()
transplantStage3 = transplant[transplant['HistologyStage'] == 'Stage3']
transplantStage3 = transplantStage3['HistologyStage'].count()
transplantStage4 = transplant[transplant['HistologyStage'] == 'Stage4']
transplantStage4 = transplantStage4['HistologyStage'].count()
CensorVsHisto = pd.DataFrame({'Dead': [deadStage1, deadStage2, deadStage3, deadStage4], 'Censored': [censoredStage1, censoredStage2, censoredStage3, censoredStage4], 'Transplant': [transplantStage1, transplantStage2, transplantStage3, transplantStage4]}, index=['Stage 1', 'Stage 2', 'Stage 3', 'Stage 4'])
CensorVsHisto

Unnamed: 0,Dead,Censored,Transplant
Stage 1,15,1,0
Stage 2,48,16,3
Stage 3,69,43,8
Stage 4,36,65,8


In [86]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(CensorVsHisto)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['Dead', 'Censored', 'Transplant'], 
                           index=['Stage 1', 'Stage 2', 'Stage 3', 'Stage 4'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,39.24922
Critical value,12.59159
p-value,6.395903e-07
degrees of freedom,6.0


Unnamed: 0,Dead,Censored,Transplant
Stage 1,8.615385,6.410256,0.974359
Stage 2,36.076923,26.842949,4.080128
Stage 3,64.615385,48.076923,7.307692
Stage 4,58.692308,43.669872,6.637821


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 6.396e-7 $<$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 3.92e+1 $>$ Critical Value = 1.259e+1
<br><br>
We will **reject** our null hypothesis. There is a relationship between censoring status and histological stage.

## B. Treatment group

###(1) Is there a relationship between treatment group assignment and sex?
$H_0$: There is no relationship between treatment group assignment and sex.
<br>
$H_A$: There is a significant relationship between treatment group assignment and sex.
<br>
Significance level: $\alpha = 0.05$


In [87]:
# Create the contingency table - treatment group and sex

penicillamineF = penicillamine[penicillamine['Sex'] == 'F']
penicillamineF = penicillamineF['Sex'].count()
penicillamineM = penicillamine[penicillamine['Sex'] == 'M']
penicillamineM = penicillamineM['Sex'].count()

placeboF = placebo[placebo['Sex'] == 'F']
placeboF = placeboF['Sex'].count()
placeboF = placebo[placebo['Sex'] == 'F']
placeboF = placeboF['Sex'].count()

TreatGrpVsSex = pd.DataFrame({'Penicillamine': [penicillamineF, placeboF],
                              'Placebo': [penicillamineM, placeboF]}, 
                             index=['Female', 'Male'])
TreatGrpVsSex

Unnamed: 0,Penicillamine,Placebo
Female,137,21
Male,139,139


In [88]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(TreatGrpVsSex)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['Penicillamine', 'Placebo'], 
                           index=['Female', 'Male'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,56.86925
Critical value,3.841459
p-value,4.657723e-14
degrees of freedom,1.0


Unnamed: 0,Penicillamine,Placebo
Female,100.018349,57.981651
Male,175.981651,102.018349


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 4.658e-14 $<$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 5.69e+1 $>$ Critical Value = 3.84
<br><br>
We will **reject** our null hypothesis. There is a relationship between treatment group and sex.

###(2) Is there a relationship between treatment group assignment and presence of ascites?
$H_0$: There is no relationship between treatment group assignment and presence of ascites.
<br>
$H_A$: There is a significant relationship between treatment group assignment and presence of ascites.
<br>
Significance level: $\alpha = 0.05$


In [89]:
# Create the contingency table - treatment group and presence of ascites

penicillamineAscites = penicillamine[penicillamine['Ascites'] == 'yes']
penicillamineAscites = penicillamineAscites['Ascites'].count()
penicillamineNoAscites = penicillamine[penicillamine['Ascites'] == 'no']
penicillamineNoAscites = penicillamineNoAscites['Ascites'].count()

placeboAscites = placebo[placebo['Ascites'] == 'yes']
placeboAscites = placeboAscites['Ascites'].count()
placeboNoAscites = placebo[placebo['Ascites'] == 'no']
placeboNoAscites = placeboNoAscites['Ascites'].count()

TreatGrpVsAscites = pd.DataFrame({'Penicillamine': [penicillamineAscites, penicillamineNoAscites],
                              'Placebo': [placeboAscites, placeboNoAscites]}, 
                             index=['With ascites', 'Without ascites'])
TreatGrpVsAscites

Unnamed: 0,Penicillamine,Placebo
With ascites,14,10
Without ascites,144,144


In [90]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(TreatGrpVsAscites)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['Penicillamine', 'Placebo'], 
                           index=['With ascites', 'Without ascites'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,0.327244
Critical value,3.841459
p-value,0.567286
degrees of freedom,1.0


Unnamed: 0,Penicillamine,Placebo
With ascites,12.153846,11.846154
Without ascites,145.846154,142.153846


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 0.567 $>$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 0.327 $<$ Critical Value = 3.84
<br><br>
We will **not reject** our null hypothesis. There is **no** relationship between treatment group and presence of ascites.

###(3) Is there a relationship between treatment group assignment and presence of hepatomegaly?
$H_0$: There is no relationship between treatment group assignment and presence of hepatomegaly.
<br>
$H_A$: There is a significant relationship between treatment group assignment and presence of hepatomegaly.
<br>
Significance level: $\alpha = 0.05$


In [91]:
# Create the contingency table - treatment group and presence of hepatomegaly

penicillamineHepato = penicillamine[penicillamine['Hepatomegaly'] == 'yes']
penicillamineHepato = penicillamineHepato['Hepatomegaly'].count()
penicillamineNoHepato = penicillamine[penicillamine['Hepatomegaly'] == 'no']
penicillamineNoHepato = penicillamineNoHepato['Hepatomegaly'].count()

placeboHepato = placebo[placebo['Hepatomegaly'] == 'yes']
placeboHepato = placeboHepato['Hepatomegaly'].count()
placeboNoHepato = placebo[placebo['Hepatomegaly'] == 'no']
placeboNoHepato = placeboNoHepato['Hepatomegaly'].count()

TreatGrpVsHepato = pd.DataFrame({'Penicillamine': [penicillamineHepato, penicillamineNoHepato],
                              'Placebo': [placeboHepato, placeboNoHepato]}, 
                             index=['With hepatomegaly', 'Without hepatomegaly'])
TreatGrpVsHepato

Unnamed: 0,Penicillamine,Placebo
With hepatomegaly,73,87
Without hepatomegaly,85,67


In [92]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(TreatGrpVsHepato)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['Penicillamine', 'Placebo'], 
                           index=['With hepatomegaly', 'Without hepatomegaly'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,2.906762
Critical value,3.841459
p-value,0.088209
degrees of freedom,1.0


Unnamed: 0,Penicillamine,Placebo
With hepatomegaly,81.025641,78.974359
Without hepatomegaly,76.974359,75.025641


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 0.088 $>$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 2.9 $<$ Critical Value = 3.84
<br><br>
We will **not reject** our null hypothesis. There is **no** relationship between treatment group and presence of hepatomegaly.

###(4) Is there a relationship between treatment group assignment and presence of spider angiomata?
$H_0$: There is no relationship between treatment group assignment and presence of spider angiomata.
<br>
$H_A$: There is a significant relationship between treatment group assignment and presence of spider angiomata.
<br>
Significance level: $\alpha = 0.05$


In [93]:
# Create the contingency table - treatment group and presence of spider angiomata

penicillamineSpider = penicillamine[penicillamine['SpiderAngiomata'] == 'yes']
penicillamineSpider = penicillamineSpider['SpiderAngiomata'].count()
penicillamineNoSpider = penicillamine[penicillamine['SpiderAngiomata'] == 'no']
penicillamineNoSpider = penicillamineNoSpider['SpiderAngiomata'].count()

placeboSpider = placebo[placebo['SpiderAngiomata'] == 'yes']
placeboSpider = placeboSpider['SpiderAngiomata'].count()
placeboNoSpider = placebo[placebo['SpiderAngiomata'] == 'no']
placeboNoSpider = placeboNoSpider['SpiderAngiomata'].count()

TreatGrpVsSpider = pd.DataFrame({'Penicillamine': [penicillamineSpider, penicillamineNoSpider],
                              'Placebo': [placeboSpider, placeboNoSpider]}, 
                             index=['With spider angiomata', 'Without spider angiomata'])
TreatGrpVsSpider

Unnamed: 0,Penicillamine,Placebo
With spider angiomata,45,45
Without spider angiomata,113,109


In [94]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(TreatGrpVsSpider)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['Penicillamine', 'Placebo'], 
                           index=['With spider angiomata', 'Without spider angiomata'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,0.00037
Critical value,3.841459
p-value,0.98466
degrees of freedom,1.0


Unnamed: 0,Penicillamine,Placebo
With spider angiomata,45.576923,44.423077
Without spider angiomata,112.423077,109.576923


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 0.98 $>$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 0.00037 $<$ Critical Value = 3.84
<br><br>
We will **not reject** our null hypothesis. There is **no** relationship between treatment group and presence of spider angiomata.

###(5) Is there a relationship between treatment group assignment and presence of edema?
$H_0$: There is no relationship between treatment group assignment and presence of edema.
<br>
$H_A$: There is a significant relationship between treatment group assignment and presence of edema.
<br>
Significance level: $\alpha = 0.05$


In [95]:
# Create the contingency table - treatment group and presence of edema

penicillamineEdema = penicillamine[penicillamine['Edema'] == 'present']
penicillamineEdema = penicillamineEdema['Edema'].count()
penicillamineDiurese = penicillamine[penicillamine['Edema'] == 'diurese']
penicillamineDiurese = penicillamineDiurese['Edema'].count()
penicillamineAbsent = penicillamine[penicillamine['Edema'] == 'absent']
penicillamineAbsent = penicillamineAbsent['Edema'].count()

placeboEdema = placebo[placebo['Edema'] == 'present']
placeboEdema = placeboEdema['Edema'].count()
placeboDiurese = placebo[placebo['Edema'] == 'diurese']
placeboDiurese = placeboDiurese['Edema'].count()
placeboAbsent = placebo[placebo['Edema'] == 'absent']
placeboAbsent = placeboAbsent['Edema'].count()

TreatGrpVsEdema = pd.DataFrame({'Penicillamine': [penicillamineEdema, penicillamineDiurese, penicillamineAbsent],
                              'Placebo': [placeboEdema, placeboDiurese, placeboAbsent]}, 
                             index=['With edema', 'Diuresed edema', 'Absent edema'])
TreatGrpVsEdema

Unnamed: 0,Penicillamine,Placebo
With edema,10,10
Diuresed edema,16,13
Absent edema,132,131


In [96]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(TreatGrpVsEdema)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['Penicillamine', 'Placebo'], 
                           index=['With edema', 'Diuresed edema', 'Absent edema'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,0.262908
Critical value,5.991465
p-value,0.876819
degrees of freedom,2.0


Unnamed: 0,Penicillamine,Placebo
With edema,10.128205,9.871795
Diuresed edema,14.685897,14.314103
Absent edema,133.185897,129.814103


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 0.877 $>$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 0.263 $<$ Critical Value = 5.99
<br><br>
We will **not reject** our null hypothesis. There is **no** relationship between treatment group and presence of edema.

###(6) Is there a relationship between treatment group assignment and histological staging?
$H_0$: There is no relationship between treatment group assignment and histological staging.
<br>
$H_A$: There is a significant relationship between treatment group assignment and histological staging.
<br>
Significance level: $\alpha = 0.05$


In [97]:
# Create the contingency table - treatment group and histological staging

penicillamineStage1 = penicillamine[penicillamine['HistologyStage'] == 'Stage1']
penicillamineStage1 = penicillamineStage1['HistologyStage'].count()
penicillamineStage2 = penicillamine[penicillamine['HistologyStage'] == 'Stage2']
penicillamineStage2 = penicillamineStage2['HistologyStage'].count()
penicillamineStage3 = penicillamine[penicillamine['HistologyStage'] == 'Stage3']
penicillamineStage3 = penicillamineStage3['HistologyStage'].count()
penicillamineStage4 = penicillamine[penicillamine['HistologyStage'] == 'Stage4']
penicillamineStage4 = penicillamineStage4['HistologyStage'].count()

placeboStage1 = placebo[placebo['HistologyStage'] == 'Stage1']
placeboStage1 = placeboStage1['HistologyStage'].count()
placeboStage2 = placebo[placebo['HistologyStage'] == 'Stage2']
placeboStage2 = placeboStage2['HistologyStage'].count()
placeboStage3 = placebo[placebo['HistologyStage'] == 'Stage3']
placeboStage3 = placeboStage3['HistologyStage'].count()
placeboStage4 = placebo[placebo['HistologyStage'] == 'Stage4']
placeboStage4 = placeboStage4['HistologyStage'].count()

TreatGrpVsHisto = pd.DataFrame({'Penicillamine': [penicillamineStage1, penicillamineStage2, penicillamineStage3, penicillamineStage4],
                              'Placebo': [placeboStage1, placeboStage2, placeboStage3, placeboStage4]}, 
                             index=['Stage 1', 'Stage 2', 'Stage 3', 'Stage 4'])
TreatGrpVsHisto

Unnamed: 0,Penicillamine,Placebo
Stage 1,12,4
Stage 2,35,32
Stage 3,56,64
Stage 4,55,54


In [98]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(TreatGrpVsHisto)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['Penicillamine', 'Placebo'], 
                           index=['Stage 1', 'Stage 2', 'Stage 3', 'Stage 4'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,4.626314
Critical value,7.814728
p-value,0.201296
degrees of freedom,3.0


Unnamed: 0,Penicillamine,Placebo
Stage 1,8.102564,7.897436
Stage 2,33.929487,33.070513
Stage 3,60.769231,59.230769
Stage 4,55.198718,53.801282


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 0.2 $>$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 4.626 $<$ Critical Value = 7.8
<br><br>
We will **not reject** our null hypothesis. There is **no** relationship between treatment group and histological staging.

##C. Sex category

###(1) Is there a relationship between sex and presence of ascites?
$H_0$: There is no relationship between treatment group assignment and presence of ascites.
<br>
$H_A$: There is a significant relationship between treatment group assignment and presence of ascites.
<br>
Significance level: $\alpha = 0.05$


In [99]:
# Create the contingency table - sex and presence of ascites

ascitesF = female[female['Ascites'] == 'yes']
ascitesF = ascitesF['Ascites'].count()
noascitesF = female[female['Ascites'] == 'no']
noascitesF = noascitesF['Ascites'].count()

ascitesM = male[male['Ascites'] == 'yes']
ascitesM = ascitesM['Ascites'].count()
noascitesM = male[male['Ascites'] == 'no']
noascitesM = noascitesM['Ascites'].count()

SexVsAscites = pd.DataFrame({'Female': [ascitesF, noascitesF],
                             'Male': [ascitesM, noascitesM]}, 
                            index=['With ascites', 'Without ascites'])
SexVsAscites

Unnamed: 0,Female,Male
With ascites,21,3
Without ascites,255,33


In [100]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(SexVsAscites)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['Female', 'Male'], 
                           index=['With ascites', 'Without ascites'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,0.032055
Critical value,3.841459
p-value,0.857907
degrees of freedom,1.0


Unnamed: 0,Female,Male
With ascites,21.230769,2.769231
Without ascites,254.769231,33.230769


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 0.858 $>$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 0.032 $<$ Critical Value = 3.84
<br><br>
We will **not reject** our null hypothesis. There is **no** relationship between sex and presence of ascites.

###(2) Is there a relationship between sex and presence of hepatomegaly?
$H_0$: There is no relationship between treatment group assignment and presence of hepatomegaly.
<br>
$H_A$: There is a significant relationship between treatment group assignment and presence of hepatomegaly.
<br>
Significance level: $\alpha = 0.05$


In [101]:
# Create the contingency table - sex and presence of hepatomegaly

hepatoF = female[female['Hepatomegaly'] == 'yes']
hepatoF = hepatoF['Hepatomegaly'].count()
nohepatoF = female[female['Hepatomegaly'] == 'no']
nohepatoF = nohepatoF['Hepatomegaly'].count()

hepatoM = male[male['Hepatomegaly'] == 'yes']
hepatoM = hepatoM['Hepatomegaly'].count()
nohepatoM = male[male['Hepatomegaly'] == 'no']
nohepatoM = nohepatoM['Hepatomegaly'].count()

SexVsHepato = pd.DataFrame({'Female': [hepatoF, nohepatoF],
                             'Male': [hepatoM, nohepatoM]}, 
                            index=['With hepatomegaly', 'Without hepatomegaly'])
SexVsHepato

Unnamed: 0,Female,Male
With hepatomegaly,139,21
Without hepatomegaly,137,15


In [102]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(SexVsHepato)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['Female', 'Male'], 
                           index=['With hepatomegaly', 'Without hepatomegaly'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,0.522268
Critical value,3.841459
p-value,0.469876
degrees of freedom,1.0


Unnamed: 0,Female,Male
With hepatomegaly,141.538462,18.461538
Without hepatomegaly,134.461538,17.538462


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 0.47 $>$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 0.522 $<$ Critical Value = 3.84
<br><br>
We will **not reject** our null hypothesis. There is **no** relationship between sex and presence of hepatomegaly.

###(3) Is there a relationship between sex and presence of spider angiomata?
$H_0$: There is no relationship between treatment group assignment and presence of spider angiomata.
<br>
$H_A$: There is a significant relationship between treatment group assignment and presence of spider angiomata.
<br>
Significance level: $\alpha = 0.05$


In [103]:
# Create the contingency table - sex and presence of spider angiomata

spiderF = female[female['SpiderAngiomata'] == 'yes']
spiderF = spiderF['SpiderAngiomata'].count()
nospiderF = female[female['SpiderAngiomata'] == 'no']
nospiderF = nospiderF['SpiderAngiomata'].count()

spiderM = male[male['SpiderAngiomata'] == 'yes']
spiderM = spiderM['SpiderAngiomata'].count()
nospiderM = male[male['SpiderAngiomata'] == 'no']
nospiderM = nospiderM['SpiderAngiomata'].count()

SexVsSpider = pd.DataFrame({'Female': [spiderF, nospiderF],
                             'Male': [spiderM, nospiderM]}, 
                            index=['With spider angiomata', 'Without spider angiomata'])
SexVsSpider

Unnamed: 0,Female,Male
With spider angiomata,86,4
Without spider angiomata,190,32


In [104]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(SexVsSpider)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['Female', 'Male'], 
                           index=['With spider angiomata', 'Without spider angiomata'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,5.297767
Critical value,3.841459
p-value,0.021353
degrees of freedom,1.0


Unnamed: 0,Female,Male
With spider angiomata,79.615385,10.384615
Without spider angiomata,196.384615,25.615385


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 0.02 $<$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 5.298 $>$ Critical Value = 3.84
<br><br>
We will **reject** our null hypothesis. There is a significant relationship between sex and presence of spider angiomata.

###(4) Is there a relationship between sex and presence of edema?
$H_0$: There is no relationship between treatment group assignment and presence of edema.
<br>
$H_A$: There is a significant relationship between treatment group assignment and presence of edema.
<br>
Significance level: $\alpha = 0.05$


In [105]:
# Create the contingency table - sex and presence of edema

edemaF = female[female['Edema'] == 'present']
edemaF = edemaF['Edema'].count()
diureseF = female[female['Edema'] == 'diurese']
diureseF = diureseF['Edema'].count()
absentF = female[female['Edema'] == 'absent']
absentF = absentF['Edema'].count()

edemaM = male[male['Edema'] == 'present']
edemaM = edemaM['Edema'].count()
diureseM = male[male['Edema'] == 'diurese']
diureseM = diureseM['Edema'].count()
absentM = male[male['Edema'] == 'absent']
absentM = absentM['Edema'].count()

SexVsEdema = pd.DataFrame({'Female': [edemaF, diureseF, absentF],
                             'Male': [edemaM, diureseM, absentM]}, 
                            index=['With edema', 'Diuresed edema', 'Absent edema'])
SexVsEdema

Unnamed: 0,Female,Male
With edema,17,3
Diuresed edema,25,4
Absent edema,234,29


In [106]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(SexVsEdema)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['Female', 'Male'], 
                           index=['With edema', 'Diuresed edema', 'Absent edema'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,0.446715
Critical value,5.991465
p-value,0.799829
degrees of freedom,2.0


Unnamed: 0,Female,Male
With edema,17.692308,2.307692
Diuresed edema,25.653846,3.346154
Absent edema,232.653846,30.346154


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 0.7998 $>$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 0.4467 $<$ Critical Value = 5.99
<br><br>
We will **not reject** our null hypothesis. There is **no** relationship between sex and presence of spider angiomata.

###(5) Is there a relationship between sex and histological stage?
$H_0$: There is no relationship between treatment group assignment and histological stage.
<br>
$H_A$: There is a significant relationship between treatment group assignment and histological stage.
<br>
Significance level: $\alpha = 0.05$


In [107]:
# Create the contingency table - sex and histological stage

Stage1F = female[female['HistologyStage'] == 'Stage1']
Stage1F = Stage1F['HistologyStage'].count()
Stage2F = female[female['HistologyStage'] == 'Stage2']
Stage2F = Stage2F['HistologyStage'].count()
Stage3F = female[female['HistologyStage'] == 'Stage3']
Stage3F = Stage3F['HistologyStage'].count()
Stage4F = female[female['HistologyStage'] == 'Stage4']
Stage4F = Stage4F['HistologyStage'].count()

Stage1M = male[male['HistologyStage'] == 'Stage1']
Stage1M = Stage1M['HistologyStage'].count()
Stage2M = male[male['HistologyStage'] == 'Stage2']
Stage2M = Stage2M['HistologyStage'].count()
Stage3M = male[male['HistologyStage'] == 'Stage3']
Stage3M = Stage3M['HistologyStage'].count()
Stage4M = male[male['HistologyStage'] == 'Stage14']
Stage4M = Stage4M['HistologyStage'].count()

SexVsHisto = pd.DataFrame({'Female': [Stage1F, Stage2F, Stage3F, Stage4F],
                             'Male': [Stage1M, Stage2M, Stage3M, Stage4M]}, 
                            index=['Stage 1', 'Stage 2', 'Stage 3', 'Stage 4'])
SexVsHisto

Unnamed: 0,Female,Male
Stage 1,13,3
Stage 2,61,6
Stage 3,108,12
Stage 4,94,0


In [108]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(SexVsHisto)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['Female', 'Male'], 
                           index=['Stage 1', 'Stage 2', 'Stage 3', 'Stage 4'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,12.402906
Critical value,7.814728
p-value,0.006123
degrees of freedom,3.0


Unnamed: 0,Female,Male
Stage 1,14.868687,1.131313
Stage 2,62.262626,4.737374
Stage 3,111.515152,8.484848
Stage 4,87.353535,6.646465


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 0.006 $<$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 12.4 $>$ Critical Value = 7.8
<br><br>
We will **reject** our null hypothesis. There is a significant relationship between sex and histological stage of the disease.

##D. Ascites

###(1) Is there a relationship between presence of ascites and presence of hepatomegaly?
$H_0$: There is no relationship between presence of ascites and presence of hepatomegaly.
<br>
$H_A$: There is a significant relationship between presence of ascites and presence of hepatomegaly.
<br>
Significance level: $\alpha = 0.05$


In [109]:
# Create the contingency table - presence of ascites and presence of hepatomegaly

asciteshepato = yesascites[yesascites['Hepatomegaly'] == 'yes']
asciteshepato = asciteshepato['Hepatomegaly'].count()
ascitesnohepato = yesascites[yesascites['Hepatomegaly'] == 'no']
ascitesnohepato = ascitesnohepato['Hepatomegaly'].count()

noasciteshepato = noascites[noascites['Hepatomegaly'] == 'yes']
noasciteshepato = noasciteshepato['Hepatomegaly'].count()
noascitesnohepato = noascites[noascites['Hepatomegaly'] == 'no']
noascitesnohepato = noascitesnohepato['Hepatomegaly'].count()

AscitesVsHepato = pd.DataFrame({'With ascites': [asciteshepato, ascitesnohepato],
                             'Without ascites': [noasciteshepato, noascitesnohepato]}, 
                            index=['With hepatomegaly', 'Without hepatomegaly'])
AscitesVsHepato

Unnamed: 0,With ascites,Without ascites
With hepatomegaly,19,141
Without hepatomegaly,5,147


In [110]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(AscitesVsHepato)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['With ascites', 'Without ascites'], 
                           index=['With hepatomegaly', 'Without hepatomegaly'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,6.927899
Critical value,3.841459
p-value,0.008486
degrees of freedom,1.0


Unnamed: 0,With ascites,Without ascites
With hepatomegaly,12.307692,147.692308
Without hepatomegaly,11.692308,140.307692


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 0.008 $<$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 6.93 $>$ Critical Value = 3.84
<br><br>
We will **reject** our null hypothesis. There is a significant relationship between ascites and hepatomegaly.

###(2) Is there a relationship between presence of ascites and presence of spider angiomata?
$H_0$: There is no relationship between presence of ascites and presence of spider angiomata.
<br>
$H_A$: There is a significant relationship between presence of ascites and presence of spider angiomata.
<br>
Significance level: $\alpha = 0.05$


In [111]:
# Create the contingency table - presence of ascites and presence of spider angiomata

ascitesspider = yesascites[yesascites['SpiderAngiomata'] == 'yes']
ascitesspider = ascitesspider['SpiderAngiomata'].count()
ascitesnospider = yesascites[yesascites['SpiderAngiomata'] == 'no']
ascitesnospider = ascitesnospider['SpiderAngiomata'].count()

noascitesspider = noascites[noascites['SpiderAngiomata'] == 'yes']
noascitesspider = noascitesspider['SpiderAngiomata'].count()
noascitesnospider = noascites[noascites['SpiderAngiomata'] == 'no']
noascitesnospider = noascitesnospider['SpiderAngiomata'].count()

AscitesVsSpider = pd.DataFrame({'With ascites': [ascitesspider, ascitesnospider],
                             'Without ascites': [noascitesspider, noascitesnospider]}, 
                            index=['With spider angiomata', 'Without spider angiomata'])
AscitesVsSpider

Unnamed: 0,With ascites,Without ascites
With spider angiomata,13,77
Without spider angiomata,11,211


In [112]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(AscitesVsSpider)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['With ascites', 'Without ascites'], 
                           index=['With spider angiomata', 'Without spider angiomata'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,6.839965
Critical value,3.841459
p-value,0.008914
degrees of freedom,1.0


Unnamed: 0,With ascites,Without ascites
With spider angiomata,6.923077,83.076923
Without spider angiomata,17.076923,204.923077


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 0.0089 $<$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 6.84 $>$ Critical Value = 3.84
<br><br>
We will **reject** our null hypothesis. There is a significant relationship between ascites and spider angiomata.

###(3) Is there a relationship between presence of ascites and presence of edema?
$H_0$: There is no relationship between presence of ascites and presence of edema.
<br>
$H_A$: There is a significant relationship between presence of ascites and presence of edema.
<br>
Significance level: $\alpha = 0.05$


In [113]:
# Create the contingency table - presence of ascites and presence of edema

ascitesedema = yesascites[yesascites['Edema'] == 'present']
ascitesedema = ascitesedema['Edema'].count()
ascitesdiurese = yesascites[yesascites['Edema'] == 'diurese']
ascitesdiurese = ascitesdiurese['Edema'].count()
ascitesnoedema = yesascites[yesascites['Edema'] == 'absent']
ascitesnoedema = ascitesnoedema['Edema'].count()

noascitesedema = noascites[noascites['Edema'] == 'preset']
noascitesedema = noascitesedema['Edema'].count()
noascitesdiurese = noascites[noascites['Edema'] == 'diurese']
noascitesdiurese = noascitesdiurese['Edema'].count()
noascitesnoedema = noascites[noascites['Edema'] == 'absent']
noascitesnoedema = noascitesnoedema['Edema'].count()

AscitesVsEdema = pd.DataFrame({'With ascites': [ascitesedema, ascitesdiurese, ascitesnoedema],
                             'Without ascites': [noascitesedema, noascitesdiurese, noascitesnoedema]}, 
                            index=['With edema', 'Diuresed edema', 'Absent edema'])
AscitesVsEdema

Unnamed: 0,With ascites,Without ascites
With edema,14,0
Diuresed edema,4,25
Absent edema,6,257


In [114]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(AscitesVsEdema)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['With ascites', 'Without ascites'], 
                           index=['With edema', 'Diuresed edema', 'Absent edema'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,177.1759
Critical value,5.991465
p-value,3.3631570000000005e-39
degrees of freedom,2.0


Unnamed: 0,With ascites,Without ascites
With edema,1.098039,12.901961
Diuresed edema,2.27451,26.72549
Absent edema,20.627451,242.372549


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 3.36e-39 $<$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 1.77e+2 $>$ Critical Value = 5.99
<br><br>
We will **reject** our null hypothesis. There is a significant relationship between ascites and edema.

###(4) Is there a relationship between presence of ascites and histological stage?
$H_0$: There is no relationship between presence of ascites and histological stage.
<br>
$H_A$: There is a significant relationship between presence of ascites and histological stage.
<br>
Significance level: $\alpha = 0.05$


In [115]:
# Create the contingency table - presence of ascites and histological stage

ascitesStage1 = yesascites[yesascites['HistologyStage'] == 'Stage1']
ascitesStage1 = ascitesStage1['HistologyStage'].count()
ascitesStage2 = yesascites[yesascites['HistologyStage'] == 'Stage2']
ascitesStage2 = ascitesStage2['HistologyStage'].count()
ascitesStage3 = yesascites[yesascites['HistologyStage'] == 'Stage3']
ascitesStage3 = ascitesStage3['HistologyStage'].count()
ascitesStage4 = yesascites[yesascites['HistologyStage'] == 'Stage4']
ascitesStage4 = ascitesStage4['HistologyStage'].count()

noascitesStage1 = noascites[noascites['HistologyStage'] == 'Stage1']
noascitesStage1 = noascitesStage1['HistologyStage'].count()
noascitesStage2 = noascites[noascites['HistologyStage'] == 'Stage2']
noascitesStage2 = noascitesStage2['HistologyStage'].count()
noascitesStage3 = noascites[noascites['HistologyStage'] == 'Stage3']
noascitesStage3 = noascitesStage3['HistologyStage'].count()
noascitesStage4 = noascites[noascites['HistologyStage'] == 'Stage4']
noascitesStage4 = noascitesStage4['HistologyStage'].count()

AscitesVsHisto = pd.DataFrame({'With ascites': [ascitesStage1, ascitesStage2, ascitesStage3, ascitesStage4],
                             'Without ascites': [noascitesStage1, noascitesStage2, noascitesStage3, noascitesStage4]}, 
                            index=['Stage 1', 'Stage 2', 'Stage 3', 'Stage 4'])
AscitesVsHisto

Unnamed: 0,With ascites,Without ascites
Stage 1,0,16
Stage 2,2,65
Stage 3,1,119
Stage 4,21,88


In [116]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(AscitesVsHisto)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['With ascites', 'Without ascites'], 
                           index=['Stage 1', 'Stage 2', 'Stage 3', 'Stage 4'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,31.93751
Critical value,7.814728
p-value,5.394582e-07
degrees of freedom,3.0


Unnamed: 0,With ascites,Without ascites
Stage 1,1.230769,14.769231
Stage 2,5.153846,61.846154
Stage 3,9.230769,110.769231
Stage 4,8.384615,100.615385


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 5.39e-7 $<$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 3.19e+1 $>$ Critical Value = 7.81
<br><br>
We will **reject** our null hypothesis. There is a significant relationship between ascites and histological stage.

## E. Hepatomegaly

###(1) Is there a relationship between presence of hepatomegaly and presence of spider angiomata?
$H_0$: There is no relationship between presence of hepatomegaly and presence of spider angiomata.
<br>
$H_A$: There is a significant relationship between presence of hepatomegaly and presence of spider angiomata.
<br>
Significance level: $\alpha = 0.05$


In [117]:
# Create the contingency table - presence of hepatomegaly and presence of spider angiomata

hepatospider = yeshepato[yeshepato['SpiderAngiomata'] == 'yes']
hepatospider = hepatospider['SpiderAngiomata'].count()
hepatonospider = yeshepato[yeshepato['SpiderAngiomata'] == 'no']
hepatonospider = hepatonospider['SpiderAngiomata'].count()

nohepatospider = nohepato[nohepato['SpiderAngiomata'] == 'yes']
nohepatospider = nohepatospider['SpiderAngiomata'].count()
nohepatonospider = nohepato[nohepato['SpiderAngiomata'] == 'no']
nohepatonospider = nohepatonospider['SpiderAngiomata'].count()

HepatoVsSpider = pd.DataFrame({'With hepatomegaly': [hepatospider, hepatonospider],
                             'Without hepatomegaly': [nohepatospider, nohepatonospider]}, 
                            index=['With spider angiomata', 'Without spider angiomata'])
HepatoVsSpider

Unnamed: 0,With hepatomegaly,Without hepatomegaly
With spider angiomata,67,23
Without spider angiomata,93,129


In [118]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(HepatoVsSpider)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['With hepatomegaly', 'Without hepatomegaly'], 
                           index=['With spider angiomata', 'Without spider angiomata'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,25.87435
Critical value,3.841459
p-value,3.643798e-07
degrees of freedom,1.0


Unnamed: 0,With hepatomegaly,Without hepatomegaly
With spider angiomata,46.153846,43.846154
Without spider angiomata,113.846154,108.153846


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 3.64e-7 $<$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 2.587e+1 $>$ Critical Value = 3.84
<br><br>
We will **reject** our null hypothesis. There is a significant relationship between hepatomegaly and spider angiomata.

###(2) Is there a relationship between presence of hepatomegaly and presence of edema?
$H_0$: There is no relationship between presence of hepatomegaly and presence of edema.
<br>
$H_A$: There is a significant relationship between presence of hepatomegaly and presence of edema.
<br>
Significance level: $\alpha = 0.05$


In [119]:
# Create the contingency table - presence of hepatomegaly and presence of edema

hepatoedema = yeshepato[yeshepato['Edema'] == 'present']
hepatoedema = hepatoedema['Edema'].count()
hepatodiurese = yeshepato[yeshepato['Edema'] == 'diurese']
hepatodiurese = hepatodiurese['Edema'].count()
hepatoabsent = yeshepato[yeshepato['Edema'] == 'absent']
hepatoabsent = hepatoabsent['Edema'].count()

nohepatoedema = nohepato[nohepato['Edema'] == 'present']
nohepatoedema = nohepatoedema['Edema'].count()
nohepatodiurese = nohepato[nohepato['Edema'] == 'diurese']
nohepatodiurese = nohepatodiurese['Edema'].count()
nohepatoabsent = nohepato[nohepato['Edema'] == 'absent']
nohepatoabsent = nohepatoabsent['Edema'].count()

HepatoVsEdema = pd.DataFrame({'With hepatomegaly': [hepatoedema, hepatodiurese, hepatoabsent],
                             'Without hepatomegaly': [nohepatoedema, nohepatodiurese, nohepatoabsent]}, 
                            index=['With edema', 'Diuresed edema', 'Absent edema'])
HepatoVsEdema

Unnamed: 0,With hepatomegaly,Without hepatomegaly
With edema,15,5
Diuresed edema,20,9
Absent edema,125,138


In [120]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(HepatoVsEdema)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['With hepatomegaly', 'Without hepatomegaly'], 
                           index=['With edema', 'Diuresed edema', 'Absent edema'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,9.616193
Critical value,5.991465
p-value,0.008163
degrees of freedom,2.0


Unnamed: 0,With hepatomegaly,Without hepatomegaly
With edema,10.25641,9.74359
Diuresed edema,14.871795,14.128205
Absent edema,134.871795,128.128205


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 0.008 $<$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 9.62 $>$ Critical Value = 5.99
<br><br>
We will **reject** our null hypothesis. There is a significant relationship between hepatomegaly and edema.

###(3) Is there a relationship between presence of hepatomegaly and histological stage?
$H_0$: There is no relationship between presence of hepatomegaly and histological stage.
<br>
$H_A$: There is a significant relationship between presence of hepatomegaly and histological stage.
<br>
Significance level: $\alpha = 0.05$


In [121]:
# Create the contingency table - presence of hepatomegaly and histological stage

hepatoStage1 = yeshepato[yeshepato['HistologyStage'] == 'Stage1']
hepatoStage1 = hepatoStage1['HistologyStage'].count()
hepatoStage2 = yeshepato[yeshepato['HistologyStage'] == 'Stage2']
hepatoStage2 = hepatoStage2['HistologyStage'].count()
hepatoStage3 = yeshepato[yeshepato['HistologyStage'] == 'Stage3']
hepatoStage3 = hepatoStage3['HistologyStage'].count()
hepatoStage4 = yeshepato[yeshepato['HistologyStage'] == 'Stage4']
hepatoStage4 = hepatoStage4['HistologyStage'].count()

nohepatoStage1 = nohepato[nohepato['HistologyStage'] == 'Stage1']
nohepatoStage1 = nohepatoStage1['HistologyStage'].count()
nohepatoStage2 = nohepato[nohepato['HistologyStage'] == 'Stage2']
nohepatoStage2 = nohepatoStage2['HistologyStage'].count()
nohepatoStage3 = nohepato[nohepato['HistologyStage'] == 'Stage3']
nohepatoStage3 = nohepatoStage3['HistologyStage'].count()
nohepatoStage4 = nohepato[nohepato['HistologyStage'] == 'Stage4']
nohepatoStage4 = nohepatoStage4['HistologyStage'].count()

HepatoVsHisto = pd.DataFrame({'With hepatomegaly': [hepatoStage1, hepatoStage2, hepatoStage3, hepatoStage4],
                            'Without hepatomegaly': [nohepatoStage1, nohepatoStage2, nohepatoStage3, nohepatoStage4]}, 
                            index=['Stage 1', 'Stage 2', 'Stage 3', 'Stage 4'])
HepatoVsHisto

Unnamed: 0,With hepatomegaly,Without hepatomegaly
Stage 1,0,16
Stage 2,19,48
Stage 3,53,67
Stage 4,88,21


In [122]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(HepatoVsHisto)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['With hepatomegaly', 'Without hepatomegaly'], 
                           index=['Stage 1', 'Stage 2', 'Stage 3', 'Stage 4'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,71.21075
Critical value,7.814728
p-value,2.349473e-15
degrees of freedom,3.0


Unnamed: 0,With hepatomegaly,Without hepatomegaly
Stage 1,8.205128,7.794872
Stage 2,34.358974,32.641026
Stage 3,61.538462,58.461538
Stage 4,55.897436,53.102564


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 2.349e-15 $<$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 7.12e+1 $>$ Critical Value = 7.8
<br><br>
We will **reject** our null hypothesis. There is a significant relationship between hepatomegaly and histological stage.

## F. Spider angiomata

###(1) Is there a relationship between presence of spider angiomata and presence of edema?
$H_0$: There is no relationship between presence of spider angiomata and presence of edema.
<br>
$H_A$: There is a significant relationship between presence of spider angiomata and presence of edema.
<br>
Significance level: $\alpha = 0.05$


In [123]:
# Create the contingency table - presence of spider angiomata and edema

spiderEdema = yesspider[yesspider['Edema'] == 'present']
spiderEdema = spiderEdema['Edema'].count()
spiderDiurese = yesspider[yesspider['Edema'] == 'diurese']
spiderDiurese = spiderDiurese['Edema'].count()
spiderAbsent = yesspider[yesspider['Edema'] == 'absent']
spiderAbsent = spiderAbsent['Edema'].count()

nospiderEdema = nospider[nospider['Edema'] == 'present']
nospiderEdema = nospiderEdema['Edema'].count()
nospiderDiurese = nospider[nospider['Edema'] == 'diurese']
nospiderDiurese = nospiderDiurese['Edema'].count()
nospiderAbsent = nospider[nospider['Edema'] == 'absent']
nospiderAbsent = nospiderAbsent['Edema'].count()

SpiderVsEdema = pd.DataFrame({'With spider angiomata': [spiderEdema, spiderDiurese, spiderAbsent],
                             'Without spider angiomata': [nospiderEdema, nospiderDiurese, nospiderAbsent]}, 
                            index=['With edema', 'Diuresed edema', 'Absent edema'])
SpiderVsEdema

Unnamed: 0,With spider angiomata,Without spider angiomata
With edema,14,6
Diuresed edema,13,16
Absent edema,63,200


In [124]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(SpiderVsEdema)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['With hepatomegaly', 'Without hepatomegaly'], 
                           index=['With edema', 'Diuresed edema', 'Absent edema'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,23.177921
Critical value,5.991465
p-value,9e-06
degrees of freedom,2.0


Unnamed: 0,With hepatomegaly,Without hepatomegaly
With edema,5.769231,14.230769
Diuresed edema,8.365385,20.634615
Absent edema,75.865385,187.134615


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 0.000009 $<$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 23.178 $>$ Critical Value = 5.99
<br><br>
We will **reject** our null hypothesis. There is a significant relationship between hepatomegaly and edema.

###(2) Is there a relationship between presence of spider angiomata and histological stage?
$H_0$: There is no relationship between presence of spider angiomata and histological stage.
<br>
$H_A$: There is a significant relationship between presence of spider angiomata and histological stage.
<br>
Significance level: $\alpha = 0.05$


In [125]:
# Create the contingency table - presence of spider angiomata and histological stage

spiderStage1 = yesspider[yesspider['HistologyStage'] == 'Stage1']
spiderStage1 = spiderStage1['HistologyStage'].count()
spiderStage2 = yesspider[yesspider['HistologyStage'] == 'Stage2']
spiderStage2 = spiderStage2['HistologyStage'].count()
spiderStage3 = yesspider[yesspider['HistologyStage'] == 'Stage3']
spiderStage3 = spiderStage3['HistologyStage'].count()
spiderStage4 = yesspider[yesspider['HistologyStage'] == 'Stage4']
spiderStage4 = spiderStage4['HistologyStage'].count()

nospiderStage1 = nospider[nospider['HistologyStage'] == 'Stage1']
nospiderStage1 = nospiderStage1['HistologyStage'].count()
nospiderStage2 = nospider[nospider['HistologyStage'] == 'Stage2']
nospiderStage2 = nospiderStage2['HistologyStage'].count()
nospiderStage3 = nospider[nospider['HistologyStage'] == 'Stage3']
nospiderStage3 = nospiderStage3['HistologyStage'].count()
nospiderStage4 = nospider[nospider['HistologyStage'] == 'Stage4']
nospiderStage4 = nospiderStage4['HistologyStage'].count()

SpiderVsHisto = pd.DataFrame({'With spider angiomata': [spiderStage1, spiderStage2, spiderStage3, spiderStage4],
                             'Without spider angiomata': [nospiderStage1, nospiderStage2, nospiderStage3, nospiderStage4]}, 
                            index=['Stage 1', 'Stage 2', 'Stage 3', 'Stage 4'])
SpiderVsHisto

Unnamed: 0,With spider angiomata,Without spider angiomata
Stage 1,1,15
Stage 2,9,58
Stage 3,30,90
Stage 4,50,59


In [126]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(SpiderVsHisto)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['With hepatomegaly', 'Without hepatomegaly'], 
                           index=['Stage 1', 'Stage 2', 'Stage 3', 'Stage 4'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,27.993448
Critical value,7.814728
p-value,4e-06
degrees of freedom,3.0


Unnamed: 0,With hepatomegaly,Without hepatomegaly
Stage 1,4.615385,11.384615
Stage 2,19.326923,47.673077
Stage 3,34.615385,85.384615
Stage 4,31.442308,77.557692


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 0.000004 $<$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 27.993 $>$ Critical Value = 7.8
<br><br>
We will **reject** our null hypothesis. There is a significant relationship between hepatomegaly and histological stage.

## G. Edema

###(1) Is there a relationship between presence of edema and histological stage?
$H_0$: There is no relationship between presence of spider angiomata and histological stage.
<br>
$H_A$: There is a significant relationship between presence of spider angiomata and histological stage.
<br>
Significance level: $\alpha = 0.05$


In [127]:
# Create the contingency table - presence of edema and histological stage
presentStage1 = present[present['HistologyStage'] == 'Stage1']
presentStage1 = presentStage1['HistologyStage'].count()
presentStage2 = present[present['HistologyStage'] == 'Stage2']
presentStage2 = presentStage2['HistologyStage'].count()
presentStage3 = present[present['HistologyStage'] == 'Stage3']
presentStage3 = presentStage3['HistologyStage'].count()
presentStage4 = present[present['HistologyStage'] == 'Stage4']
presentStage4 = presentStage4['HistologyStage'].count()
diureseStage1 = diurese[diurese['HistologyStage'] == 'Stage1']
diureseStage1 = diureseStage1['HistologyStage'].count()
diureseStage2 = diurese[diurese['HistologyStage'] == 'Stage2']
diureseStage2 = diureseStage2['HistologyStage'].count()
diureseStage3 = diurese[diurese['HistologyStage'] == 'Stage3']
diureseStage3 = diureseStage3['HistologyStage'].count()
diureseStage4 = diurese[diurese['HistologyStage'] == 'Stage4']
diureseStage4 = diureseStage4['HistologyStage'].count()
absentStage1 = absent[absent['HistologyStage'] == 'Stage1']
absentStage1 = absentStage1['HistologyStage'].count()
absentStage2 = absent[absent['HistologyStage'] == 'Stage2']
absentStage2 = absentStage2['HistologyStage'].count()
absentStage3 = absent[absent['HistologyStage'] == 'Stage3']
absentStage3 = absentStage3['HistologyStage'].count()
absentStage4 = absent[absent['HistologyStage'] == 'Stage4']
absentStage4 = absentStage4['HistologyStage'].count()
EdemaVsHisto = pd.DataFrame({'With edema': [presentStage1, presentStage2, presentStage3, presentStage4], 'Diuresed edema': [diureseStage1, diureseStage2, diureseStage3, diureseStage4], 'Absent edema': [absentStage1, absentStage2, absentStage3, absentStage4]}, index=['Stage 1', 'Stage 2', 'Stage 3', 'Stage 4'])
EdemaVsHisto

Unnamed: 0,With edema,Diuresed edema,Absent edema
Stage 1,0,0,16
Stage 2,1,4,62
Stage 3,3,11,106
Stage 4,16,14,79


In [128]:
# Perform the chi square test and determine the critical value (using 95% probability)

chi2stat, pval, dof, expected = chi2_contingency(EdemaVsHisto)
prob = 0.95
critval = chi2.ppf(prob, dof)
result = pd.DataFrame([chi2stat, critval, pval, dof], 
                      index=['Chi2 statistics', 'Critical value', 'p-value', 'degrees of freedom'])
expectedval = pd.DataFrame(expected, columns=['With edema', 'Diuresed edema', 'Absent edema'], 
                           index=['Stage 1', 'Stage 2', 'Stage 3', 'Stage 4'])
display(result)
display(expectedval)

Unnamed: 0,0
Chi2 statistics,24.850897
Critical value,12.591587
p-value,0.000364
degrees of freedom,6.0


Unnamed: 0,With edema,Diuresed edema,Absent edema
Stage 1,1.025641,1.487179,13.487179
Stage 2,4.294872,6.227564,56.477564
Stage 3,7.692308,11.153846,101.153846
Stage 4,6.987179,10.13141,91.88141


### Result and Interpretation
Our result revealed that:
<br><br>
p-value = 0.000364 $<$ $\alpha = 0.05$
<br>
$\chi^2$ stat = 24.85 $>$ Critical Value = 12.59
<br><br>
We will **reject** our null hypothesis. There is a significant relationship between edema and histological stage.