# SNP Colon Cancer Data-Set Applying Different Approaches by K-NN Based Imputation

In this Ipython file we applied different approaches to determine the which approach is more suitable for our analysis. 

In [1]:
import sys
main_path = "../.."
sys.path.append(main_path)

# importing modules
import os
import yaml
import pickle
import numpy as np
import pandas as pd
from src.scripts.model_and_evaluation import ModelAndEvaluation
import warnings

warnings.simplefilter('ignore')

In [2]:
model_and_evaluation = ModelAndEvaluation()

In [3]:
df_path = f"{main_path}/data/processed/dataframes"
targets = pd.read_csv(f"{df_path}/targets.csv").values.ravel()

### Baseline

In [4]:
baseline_df = pd.read_csv(f"{df_path}/baseline_approach.csv")
metrics_score_baseline = model_and_evaluation.return_metric_results(baseline_df.values, targets)

### OHE -> DI

In [5]:
ohe_di = pd.read_csv(f"{df_path}/ohe_di.csv")
metrics_score_ohe_di = model_and_evaluation.return_metric_results(ohe_di.values, targets)

### OHE -> DI -> FI

In [6]:
ohe_di_fi = pd.read_csv(f"{df_path}/ohe_di_fi.csv")
metrics_score_ohe_di_fi = model_and_evaluation.return_metric_results(ohe_di_fi.values, targets)

### CDT -> FI

In [7]:
cdt_di = pd.read_csv(f"{df_path}/cdt_di.csv")
metrics_score_cdt_di = model_and_evaluation.return_metric_results(cdt_di.values, targets)

### CDT -> DI -> FI

In [8]:
cdt_di_fi = pd.read_csv(f"{df_path}/cdt_di_fi.csv")
metrics_score_cdt_di_fi = model_and_evaluation.return_metric_results(cdt_di_fi.values, targets)

### CDT -> DI -> OHE

In [9]:
cdt_di_ohe = pd.read_csv(f"{df_path}/cdt_di_ohe.csv")
metrics_score_cdt_di_ohe = model_and_evaluation.return_metric_results(cdt_di_ohe.values, targets)

### CDT -> DI -> FI -> OHE

In [10]:
cdt_di_fi_ohe = pd.read_csv(f"{df_path}/cdt_di_fi_ohe.csv")
metrics_score_cdt_di_fi_ohe = model_and_evaluation.return_metric_results(cdt_di_fi_ohe.values, targets)

### Saving Results

In [11]:
scores = {'baseline': metrics_score_baseline, 'ohe_di': metrics_score_ohe_di, 'ohe_di_fi': metrics_score_ohe_di_fi,
         'cdt_di': metrics_score_cdt_di, 'cdt_di_fi': metrics_score_cdt_di_fi, 'cdt_di_ohe': metrics_score_cdt_di_ohe,
         'cdt_di_fi_ohe': metrics_score_cdt_di_fi_ohe}

os.makedirs(f"{main_path}/data/processed/model_scores", exist_ok=True)
with open(f"{main_path}/data/processed/model_scores/best_scores.pkl", 'wb') as f:
    pickle.dump(scores, f)