In [None]:
from preprocessing.data_cleaning import get_cleaned_data, split_csv, get_features_label
from preprocessing.encoding import encode_data
from exploration.data_correlations import plot_correlations
from exploration.data_statistics import plot_patientwise_errors, plot_labels_pie_chart, plot_blood_draw_statistics
from exploration.midterm_exploration import plot_feature_against_diagnosis
from utils.utils import log_features
from modeling.mrmr import perform_mrmr

In [None]:
df = encode_data(get_cleaned_data())
df = df[df["P1_PT_TYPE"] != 3]
log_features(df)
blood, clinical = split_csv(df)

In [None]:
plot_patientwise_errors(
    df,
    ["C1_WAIS3_DIGTOT", "B1_BMI", "RBM_Insulin"],
    ["Race", "WAIS3 Digits Score", "BMI", "Insulin"],
    png="Patientwise Errors"
)

In [None]:
plot_labels_pie_chart(df, png="Pie Chart of Diagnoses")

In [None]:
plot_blood_draw_statistics(df, png="Blood Draw Proportions")

In [None]:
plot_correlations(df)

In [None]:
import pandas as pd

y, X = get_features_label(df)
y = pd.Series(y)
features = perform_mrmr(X, y, 10)

In [None]:
plot_feature_against_diagnosis(
    blood,
    "Q1_Total_tau",
    title="Total Tau Concentration by Diagnosis",
    ylabel="Total Tau Concentration (pg/mL)",
    png="Total Tau Concentration by Diagnosis"
)

In [None]:
plot_feature_against_diagnosis(
    clinical,
    "C1_MMSE",
    title="MMSE Score by Diagnosis",
    ylabel="MMSE Score",
    png="MMSE Score by Diagnosis"
)

In [None]:
plot_feature_against_diagnosis(
    clinical,
    "C1_WMSR_DIGTOT",
    title="WMSR Digit Span Score by Diagnosis",
    ylabel="WMSR Digit Span Score",
    png="WMSR Digit Span Score by Diagnosis"
)

In [None]:
plot_feature_against_diagnosis(
    clinical,
    "C1_CDRSUM",
    title="CDR Score by Diagnosis",
    ylabel="CDR Score",
    png="CDR Score by Diagnosis"
)

temp = clinical[clinical["P1_PT_TYPE"] == 2]
temp["C1_CDRSUM"].value_counts()

In [None]:
import pandas as pd
label_encoded_df = pd.get_dummies(blood, columns=["P1_PT_TYPE"])

correlations = blood.corr().abs().unstack()
ad_correlations = correlations["P1_PT_TYPE"].sort_values()
print(ad_correlations.index[:100])