In [49]:
import pandas as pd
from pathlib import Path
import scipy.stats as stats
import numpy as np

In [50]:
def get_data():
    data = pd.read_csv("POSE.csv", index_col=0, parse_dates=["DATE.INDUC", "DATE.DEATH", "DATE.DEATH.BIS"])
    data = data.loc[~data["HEMOGLOBINE_PREOP"].isna().values]
    print(f"There are {len(data)} patients with given Hb")
    return data

In [51]:
def set_alive(data, death_column="DATE.DEATH"):
    data["ALIVE"] = pd.Series(np.nan, index=data.index)
    alive_yes = data.loc[(data["ALIVE.J30"] == "OUI").values].index
    alive_no = data.loc[(data["ALIVE.J30"] == "NON").values].index
    print(f"There are {len(alive_yes)} patients that are still alive")
    print(f"There are {len(alive_no)} patients that are dead")
    data.loc[alive_yes, "ALIVE"] = 1
    data.loc[alive_no, "ALIVE"] = 0
    dead_date = [x.days <= 30 for x in (data["DATE.DEATH"] - data["DATE.INDUC"])]
    print(f"We found {dead_date.count(True)} dead patients from their date information")
    data.loc[dead_date, "ALIVE"] = 0
    alive_date = [x.days > 30 for x in (data[death_column] - data["DATE.INDUC"])]
    print(f"We found {alive_date.count(True)} alive patients from their date information")
    data.loc[alive_date, "ALIVE"] = 1
    nalive = len(data[data["ALIVE"] == 1])
    ndead = len(data[data["ALIVE"] == 0])
    print(f"Total : {nalive} alive and {ndead} dead")
    return data

In [58]:
def set_anemia(data):
    data["ANEMIE"] = pd.Series(0, index=data.index)
    idx_female = data[data["SEXE"] == "Female"].index
    idx_male = data[data["SEXE"] == "Male"].index
    anemique_female_index = data.loc[idx_female].loc[(data.loc[idx_female, "HEMOGLOBINE_PREOP"] < 12.0).values].index
    anemique_male_index = data.loc[idx_male].loc[(data.loc[idx_male, "HEMOGLOBINE_PREOP"] < 13.0).values].index
    print(f"There are {len(anemique_female_index) + len(anemique_male_index)} anaemic patients")
    data.loc[anemique_female_index, "ANEMIE"] = 1
    data.loc[anemique_male_index, "ANEMIE"] = 1
    return data

In [62]:
def get_fisher_df(data):
    df = pd.DataFrame(columns=["Alive", "Dead"], index=["Anaemic", "Not Anaemic"])
    df.loc["Not Anaemic", "Dead"] = len(data.loc[((data["ALIVE"] == 0) & (data["ANEMIE"] == 0)).values])
    df.loc["Anaemic", "Dead"] = len(data.loc[((data["ALIVE"] == 0) & (data["ANEMIE"] == 1)).values])
    df.loc["Anaemic", "Alive"] = len(data.loc[((data["ALIVE"] == 1) & (data["ANEMIE"] == 1)).values])
    df.loc["Not Anaemic", "Alive"] = len(data.loc[((data["ALIVE"] == 1) & (data["ANEMIE"] == 0)).values])
    return df

In [63]:
df = get_data()

There are 1445 patients with given Hb


In [64]:
df = set_alive(df)

There are 1239 patients that are still alive
There are 7 patients that are dead
We found 38 dead patients from their date information
We found 0 alive patients from their date information
Total : 1239 alive and 39 dead


In [65]:
df = set_anemia(df)

There are 638 anaemic patients


In [66]:
df_for_fisher = get_fisher_df(data)

In [67]:
df_for_fisher.values

array([[517, 6],
       [722, 1]], dtype=object)

In [69]:
onetaile_p, twtail_p = stats.fisher_exact(df_for_fisher)