In [31]:
# import packages
import numpy as np 
import pandas as pd 
from scipy.stats import chi2_contingency
from scipy.stats import ttest_ind
import matplotlib.pyplot as plt
%matplotlib inline
import plotnine as p9
import seaborn as sns

# import data
internal = pd.read_csv('data/medical_clean.csv')
external = pd.read_csv('data/External_Hospital_Data.csv')

# Define boolean masks
internal_readmitted_mask = internal['ReAdmis'] == 'Yes'
external_readmitted_mask = external['Readmission'] == 'Yes'

# Confidence level
alpha = .05

## Initial Days

### T-test: Internal Intial Days and Readmission

In [50]:
# Perform the t-test:
t_stat, p_value = ttest_ind(internal[internal_readmitted_mask].Initial_days
                            ,internal[np.logical_not(internal_readmitted_mask)].Initial_days)

# Interpret the results:
if p_value < alpha:
    print("Reject the null hypothesis [p_value = {:.3f}]; \n there is a significant difference between the initial days of patients readmitted an patients not readmitted.".format(p_value))
else:
    print("Fail to reject the null hypothesis [p_value = {:.3f}]; \n there is no significant difference between the initial days of patients readmitted an patients not readmitted.".format(p_value))
    

Reject the null hypothesis [p_value = 0.000]; 
 there is a significant difference between the initial days of patients readmitted an patients not readmitted.


### T-test: External Intial Days and Readmission

In [49]:
# Perform the t-test:
t_stat, p_value = ttest_ind(external[external_readmitted_mask].Length_of_Stay
                            ,external[np.logical_not(external_readmitted_mask)].Length_of_Stay)

# Interpret the results:
if p_value < alpha:
    print("Reject the null hypothesis [p_value = {:.3f}]; \n there is a significant difference between the initial days of patients readmitted and patients not readmitted.".format(p_value))
else:
    print("Fail to reject the null hypothesis [p_value = {:.3f}]; \n there is no significant difference between the initial days of patients readmitted and patients not readmitted.".format(p_value))
    

Fail to reject the null hypothesis [p_value = 0.143]; 
 there is no significant difference between the initial days of patients readmitted and patients not readmitted.


### T-test: Internal vs. External Intial Days

In [38]:
# Perform the t-test:
t_stat, p_value = ttest_ind(external.Length_of_Stay
                            , internal.Initial_days)

# Interpret the results:
if p_value < alpha:
    print("Reject the null hypothesis [p_value = {:.3f}]; \n there is a significant difference between the initial days of all patients from the internal and external data.".format(p_value))
else:
    print("Fail to reject the null hypothesis [p_value = {:.3f%}]; \n there is no significant difference between the initial days of all patients from the internal and external data.")
    

Reject the null hypothesis [p_value = 0.000]; 
 there is a significant difference between the initial days of all patients from the internal and external data.


## Age

### T-test: Age internal vs. external 

In [48]:
# Perform the t-test:
t_stat, p_value = ttest_ind(external.Age
                            ,internal.Age)

# Interpret the results:
if p_value < alpha:
    print("Reject the null hypothesis [p_value = {:.3f}]; \n there is a significant difference between the initial days of patients from the internal and external data.".format(p_value))
else:
    print("Fail to reject the null hypothesis [p_value = {:.3f}]; \n there is no significant difference between the patients age from the internal and external data.".format(p_value))
    

Fail to reject the null hypothesis [p_value = 0.719]; 
 there is no significant difference between the initial days of patients from the internal and external data.


### T-test: Age of Internal Readmitted and Not Readmitted

In [53]:
# Perform the t-test:
t_stat, p_value = ttest_ind(internal[internal_readmitted_mask].Age
                            ,internal[np.logical_not(internal_readmitted_mask)].Age)

# Interpret the results:
if p_value < alpha:
    print("Reject the null hypothesis [p_value = {:.3f}]; \n there is a significant difference between the internal datas Age of patients readmitted and patients not readmitted.".format(p_value))
else:
    print("Fail to reject the null hypothesis [p_value = {:.3f}]; \n there is no significant difference between the internal datas Age of patients readmitted and patients not readmitted.".format(p_value))
    

Fail to reject the null hypothesis [p_value = 0.114]; 
 there is no significant difference between the internal datas Age of patients readmitted and patients not readmitted.


### T-test: External intial days and Readmission

In [55]:
# Perform the t-test:
t_stat, p_value = ttest_ind(external[external_readmitted_mask].Age
                            ,external[np.logical_not(external_readmitted_mask)].Age)

# Interpret the results:
if p_value < alpha:
    print("Reject the null hypothesis [p_value = {:.3f}]; \n there is a significant difference between the external datas Age of patients readmitted and patients not readmitted.".format(p_value))
else:
    print("Fail to reject the null hypothesis [p_value = {:.3f}]; \n there is no significant difference between the external datas Age of patients readmitted and patients not readmitted.".format(p_value))
    

Reject the null hypothesis [p_value = 0.000]; 
 there is a significant difference between the external datas Age of patients readmitted and patients not readmitted.


### T-test: Internal vs External Age of Readmission

In [57]:
# Perform the t-test:
t_stat, p_value = ttest_ind(internal[internal_readmitted_mask].Age
                            ,external[external_readmitted_mask].Age)

# Interpret the results:
if p_value < alpha:
    print("Reject the null hypothesis [p_value = {:.3f}]; \n there is a significant difference between the internal data and external data Age of patients who were readmitted.".format(p_value))
else:
    print("Fail to reject the null hypothesis [p_value = {:.3f}]; \n there is no significant difference between the internal data and external data Age of patients who were readmitted".format(p_value))
    

Reject the null hypothesis [p_value = 0.000]; 
 there is a significant difference between the internal data and external data Age of patients who were readmitted.


## Gender

### Chi2: Readmission vs Gender Internal

In [69]:
# Create a contingency table
contingency_table = pd.crosstab(index=internal['Gender'], columns=internal['ReAdmis'], margins=True)

# Run Chi-Square Test
chi2, p_value, dof, expected = chi2_contingency(contingency_table)

# Interpret the results:
if p_value < alpha:
    print("Reject the null hypothesis [p_value = {:.3f}]; \n there is a significant difference for the internal data between gender and readmission.".format(p_value))
else:
    print("Fail to reject the null hypothesis [p_value = {:.3f}]; \n there is no significant difference for the internal data between gender and readmission".format(p_value))
    

Fail to reject the null hypothesis [p_value = 0.954]; 
 there is no significant difference for the internal data between gender and readmission


### Chi2: Readmission vs Gender External

In [71]:
# Create a contingency table
contingency_table = pd.crosstab(index=external['Gender'], columns=external['Readmission'], margins=True)

# Run Chi-Square Test
chi2, p_value, dof, expected = chi2_contingency(contingency_table)

# Interpret the results:
if p_value < alpha:
    print("Reject the null hypothesis [p_value = {:.3f}]; \n there is a significant difference for the external data between gender and readmission.".format(p_value))
else:
    print("Fail to reject the null hypothesis [p_value = {:.3f}]; \n there is no significant difference for the external data between gender and readmission".format(p_value))
    

Reject the null hypothesis [p_value = 0.000]; 
 there is a significant difference for the external data between gender and readmission.
