In [1]:
# Tensorflow / Keras
from tensorflow import keras # for building Neural Networks
from keras.models import Sequential # for creating a linear stack of layers for our Neural Network
from keras import Input # for instantiating a keras tensor
from keras.layers import Dense # for creating regular densely-connected NN layers.
import tensorflow as tf
from keras.models import load_model

# Data manipulation
import pandas as pd # for data manipulation
import numpy as np # for data manipulation

# Sklearn
import sklearn # for model evaluation
from sklearn.model_selection import train_test_split # for splitting data into train and test samples
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Erstellung eigener Aktivierungsfunktion
from keras import backend as K
from sklearn.ensemble import RandomForestRegressor

import os
import pandas as pd
import numpy as np

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

from sklearn.impute import KNNImputer
#from missingpy import MissForest

In [14]:
pip install scikit-learn==1.1.1

Note: you may need to restart the kernel to use updated packages.


In [4]:
from missingpy import MissForest

In [3]:
import sys
sys.modules['sklearn.neighbors.base'] = sklearn.neighbors._base

In [5]:
class DataProcessor:
    def __init__(self):
        self.raw_data = None
        self.X = None
        self.Y = None
        self.new_df = None
        self.data_inpute = None
        self.result = None

    def import_data(self, sample_size = 4200):
        csv_files = []
        for filename in os.listdir():
            if filename == "Datasets":
                for csv_file in os.listdir(filename):
                    if csv_file.endswith('.csv'):
                        csv_files.append(csv_file)

        self.raw_data = pd.read_csv(os.path.join("Datasets", csv_files[2]))
        
        # Auswahl von 4000 zufälligen Zeilen
        self.raw_data = self.raw_data.sample(n=sample_size, random_state=42) 
        
        self.Y = self.raw_data["label"]
        self.X = self.raw_data.drop(["label"], axis=1)
        return self.raw_data, self.Y, self.X

    def gen_miss_values(self, p):
        
        shape = self.X.shape   
        self.new_df = self.X.copy().astype(np.float64)
        missing = np.random.binomial(1, p, shape)
        self.new_df[missing.astype('bool')] = np.nan
        return self.new_df

    def inpute_data(self, model):
        if model == "mean":
            self.data_inpute = self.new_df.fillna(self.new_df.mean())
            self.data_inpute = pd.concat([self.data_inpute, self.Y], axis=1, sort=False)
            columns = self.data_inpute.columns.tolist()

            for i in range(len(columns) - 1):
                columns[i] = "col_" + str(i + 1)
            self.data_inpute.columns = columns
            self.data_inpute.columns = [*self.data_inpute.columns[:-1], 'Y']

            self.Y = self.data_inpute["Y"]
            self.X = self.data_inpute.drop(["Y"], axis=1)
            return self.data_inpute

        elif model == "MICE":
            imputer = IterativeImputer() #max_iter=10, n_nearest_features = None
            self.data_inpute = pd.DataFrame(imputer.fit_transform(self.new_df), columns=self.new_df.columns)
            self.Y.index = self.data_inpute.index
            self.data_inpute = pd.concat([self.data_inpute, self.Y], axis=1, sort=False)
            print(self.data_inpute)
            columns = self.data_inpute.columns.tolist()

            for i in range(len(columns) - 1):
                columns[i] = "col_" + str(i + 1)
            self.data_inpute.columns = columns
            self.data_inpute.columns = [*self.data_inpute.columns[:-1], 'Y']

            self.Y = self.data_inpute["Y"]
            self.X = self.data_inpute.drop(["Y"], axis=1)

            return self.data_inpute

        elif model == "kNN":
            imputer = KNNImputer()
            self.data_inpute = pd.DataFrame(imputer.fit_transform(self.new_df), columns=self.new_df.columns)
            self.Y.index = self.data_inpute.index
            self.data_inpute = pd.concat([self.data_inpute, self.Y], axis=1, sort=False)

            columns = self.data_inpute.columns.tolist()

            for i in range(len(columns) - 1):
                columns[i] = "col_" + str(i + 1)
            self.data_inpute.columns = columns
            self.data_inpute.columns = [*self.data_inpute.columns[:-1], 'Y']

            self.Y = self.data_inpute["Y"]
            self.X = self.data_inpute.drop(["Y"], axis=1)

            return self.data_inpute
        
        elif model == "RF":
            imputer = MissForest(max_iter=5, random_state=42, n_jobs=-1, criterion='squared_error')
            self.data_inpute = pd.DataFrame(imputer.fit_transform(self.new_df), columns=self.new_df.columns)
            self.Y.index = self.data_inpute.index
            self.data_inpute = pd.concat([self.data_inpute, self.Y], axis=1, sort=False)

            columns = self.data_inpute.columns.tolist()

            for i in range(len(columns) - 1):
                columns[i] = "col_" + str(i + 1)
            self.data_inpute.columns = columns
            self.data_inpute.columns = [*self.data_inpute.columns[:-1], 'Y']

            self.Y = self.data_inpute["Y"]
            self.X = self.data_inpute.drop(["Y"], axis=1)

            return self.data_inpute
    
    def model(self, model):
        if model == 0:
            return load_model("model_D2_01.h5")
        elif model == 1:
            return load_model("model_D2_02.h5")
        elif model == 2:
            return load_model("model_D2_03.h5")

    def evaluate(self, y_test, y_nan):
        y_true = y_test
        y_pred = y_nan
        conf_matrix = confusion_matrix(y_true, y_pred)

        # Compute the accuracy
        accuracy = accuracy_score(y_true, y_pred)

        # Compute the precision
        precision_scores = precision_score(y_true, y_pred, labels=range(len(conf_matrix)), average=None)

        # Compute the recall
        recall = recall_score(y_true, y_pred, labels=range(len(conf_matrix)), average=None)

        # Compute the F1-score
        f1 = f1_score(y_true, y_pred, labels=range(len(conf_matrix)), average=None)

        # Compute the average accuracy
        avg_accuracy = sum(precision_scores) / len(conf_matrix)

        # Compute the predicted probabilities
        y_scores = np.random.rand(len(y_true), len(np.unique(y_true)))

        ## Compute the AUC for each class
        n_classes = conf_matrix.shape[0]
        auc_list = []
        for i in range(n_classes):
            auc_list.append(roc_auc_score(y_true == i, y_scores[:, i]))

        # Define the result dictionary
        self.result = {
            "confusion_matrix": conf_matrix,
            "accuracy": accuracy,
            "precision": precision_scores,
            "recall": recall,
            "f1_score": f1,
            "avg_accuracy": avg_accuracy,
            "auc": auc_list
        }
        return self.result
    def save_txt(self, filename, evaluate):
        with open(filename, 'w') as f:
            f.write(str(evaluate))

In [10]:
data_processor = DataProcessor()
load_data = data_processor.import_data()
test_size = 0.95  # Konstante Testgröße

for model_number in range(0, 3):  # Modelle 1-3 durchlaufen
    model_name = f"Model_{model_number + 1}"
    output_prefix = f"prediction_ref_mean_D2_{model_name}_"
    model = data_processor.model(model_number)
    
    for missing_rate in [0.3, 0.6, 0.9]:
        miss_data = data_processor.gen_miss_values(missing_rate)
        inpute_values = data_processor.inpute_data("mean")
        
        Y = inpute_values["Y"]
        X = inpute_values.drop(["Y"], axis=1)
        X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=42)

        y_pred = model.predict(X_test)
        y_pred_class = np.argmax(y_pred, axis=1)
        y_test = y_test[X_test.index].values
        evaluate = data_processor.evaluate(y_test, y_pred_class)
        print(evaluate)
        filename = output_prefix + str(missing_rate).replace(".", "_")
        data_processor.save_txt(filename, evaluate)

{'confusion_matrix': array([[192,   0,   4,  23,   7,   0, 153,   0,   4,   0],
       [  0, 351,   0,  39,   7,   0,   4,   0,   0,   0],
       [  1,   0, 206,   1, 177,   0,  18,   0,   5,   0],
       [  1,   1,   0, 338,  35,   0,  27,   0,   1,   0],
       [  0,   0,   9,  19, 372,   0,   5,   0,   2,   0],
       [  2,   0,   4,   0,  10, 196,  62,  29,  99,  12],
       [ 24,   0,  19,  23, 198,   0, 128,   0,   6,   0],
       [  0,   0,   0,   0,   0,  71,   1, 215,  79,  29],
       [  0,   0,   2,   5,   7,   0,   2,   0, 384,   0],
       [  0,   0,   0,   0,   0,   9,   0,  66,   1, 305]], dtype=int64), 'accuracy': 0.6734335839598997, 'precision': array([0.87272727, 0.99715909, 0.8442623 , 0.75446429, 0.45756458,
       0.71014493, 0.32      , 0.69354839, 0.66092943, 0.88150289]), 'recall': array([0.50130548, 0.87531172, 0.50490196, 0.83870968, 0.91400491,
       0.47342995, 0.32160804, 0.5443038 , 0.96      , 0.80052493]), 'f1_score': array([0.63681592, 0.93227092, 0.63

  _warn_prf(average, modifier, msg_start, len(result))


{'confusion_matrix': array([[  1,   0,   1,   0,  65,   0, 310,   0,   6,   0],
       [  0,  26,   0,  12, 353,   0,  10,   0,   0,   0],
       [  0,   0,  32,   0, 343,   0,  29,   0,   4,   0],
       [  0,   0,   0,  38, 285,   0,  77,   0,   3,   0],
       [  0,   0,   0,   0, 387,   0,  20,   0,   0,   0],
       [  0,   0,   0,   0,  56,   2, 214,   0, 142,   0],
       [  0,   0,   0,   0, 265,   0, 128,   0,   5,   0],
       [  0,   0,   0,   0,  15,   6, 197,   0, 177,   0],
       [  0,   0,   2,   0,  15,   0,  58,   0, 325,   0],
       [  0,   0,   0,   0,  16,  75,  97,   8, 185,   0]], dtype=int64), 'accuracy': 0.23533834586466165, 'precision': array([1.        , 1.        , 0.91428571, 0.76      , 0.215     ,
       0.02409639, 0.1122807 , 0.        , 0.3837072 , 0.        ]), 'recall': array([0.00261097, 0.06483791, 0.07843137, 0.0942928 , 0.95085995,
       0.00483092, 0.32160804, 0.        , 0.8125    , 0.        ]), 'f1_score': array([0.00520833, 0.12177986, 0.1

  _warn_prf(average, modifier, msg_start, len(result))


{'confusion_matrix': array([[  0,   0,   0,   0, 351,   0,  32,   0,   0,   0],
       [  0,   0,   0,   0, 366,   0,  35,   0,   0,   0],
       [  0,   0,   0,   0, 372,   0,  36,   0,   0,   0],
       [  0,   0,   0,   0, 368,   0,  35,   0,   0,   0],
       [  0,   0,   0,   0, 352,   0,  55,   0,   0,   0],
       [  0,   0,   0,   0, 247,   0, 167,   0,   0,   0],
       [  0,   0,   0,   0, 345,   0,  53,   0,   0,   0],
       [  0,   0,   0,   0, 232,   0, 163,   0,   0,   0],
       [  0,   0,   0,   0, 178,   0, 221,   0,   1,   0],
       [  0,   0,   0,   0, 189,   0, 191,   0,   1,   0]], dtype=int64), 'accuracy': 0.10175438596491228, 'precision': array([0.        , 0.        , 0.        , 0.        , 0.11733333,
       0.        , 0.05364372, 0.        , 0.5       , 0.        ]), 'recall': array([0.        , 0.        , 0.        , 0.        , 0.86486486,
       0.        , 0.13316583, 0.        , 0.0025    , 0.        ]), 'f1_score': array([0.        , 0.        , 0. 

  _warn_prf(average, modifier, msg_start, len(result))


{'confusion_matrix': array([[  0,   0, 383,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0, 401,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0, 408,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0, 403,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0, 407,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0, 412,   0,   0,   1,   0,   0,   1,   0],
       [  0,   0, 398,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0, 395,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0, 396,   0,   0,   2,   1,   0,   1,   0],
       [  0,   0, 380,   0,   0,   0,   0,   0,   1,   0]], dtype=int64), 'accuracy': 0.10275689223057644, 'precision': array([0.        , 0.        , 0.10243535, 0.        , 0.        ,
       0.33333333, 0.        , 0.        , 0.33333333, 0.        ]), 'recall': array([0.        , 0.        , 1.        , 0.        , 0.        ,
       0.00241546, 0.        , 0.        , 0.0025    , 0.        ]), 'f1_score': array([0.        , 0.        , 0.1

  _warn_prf(average, modifier, msg_start, len(result))


{'confusion_matrix': array([[  0,   0, 383,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0, 401,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0, 408,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0, 403,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0, 407,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0, 414,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0, 398,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0, 395,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0, 400,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0, 381,   0,   0,   0,   0,   0,   0,   0]], dtype=int64), 'accuracy': 0.10225563909774436, 'precision': array([0.        , 0.        , 0.10225564, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ]), 'recall': array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]), 'f1_score': array([0.        , 0.        , 0.18553888, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        ,

  _warn_prf(average, modifier, msg_start, len(result))


{'confusion_matrix': array([[  0,   0,   0,   0,   0,   0, 383,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 401,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 408,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 403,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 407,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 414,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 398,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 395,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 400,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 381,   0,   0,   0]], dtype=int64), 'accuracy': 0.09974937343358396, 'precision': array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.09974937, 0.        , 0.        , 0.        ]), 'recall': array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]), 'f1_score': array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.18140383, 0.        , 0.        ,

  _warn_prf(average, modifier, msg_start, len(result))


{'confusion_matrix': array([[  0,   0,   0,   0,   0,   0, 383,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 401,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 408,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 403,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 407,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 414,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 398,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 395,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 400,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 381,   0,   0,   0]], dtype=int64), 'accuracy': 0.09974937343358396, 'precision': array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.09974937, 0.        , 0.        , 0.        ]), 'recall': array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]), 'f1_score': array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.18140383, 0.        , 0.        ,

  _warn_prf(average, modifier, msg_start, len(result))


{'confusion_matrix': array([[  0,   0,   0,   0,   0,   0, 383,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 401,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 408,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 403,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 407,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 414,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 398,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 395,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 400,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 381,   0,   0,   0]], dtype=int64), 'accuracy': 0.09974937343358396, 'precision': array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.09974937, 0.        , 0.        , 0.        ]), 'recall': array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]), 'f1_score': array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.18140383, 0.        , 0.        ,

  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
data_processor = DataProcessor()
load_data = data_processor.import_data()
test_size = 0.95  # Konstante Testgröße

for model_number in range(0, 3):  # Modelle 1-3 durchlaufen
    model_name = f"Model_{model_number + 1}"
    output_prefix = f"prediction_ref_MICE_D2_{model_name}_"
    model = data_processor.model(model_number)
    
    for missing_rate in [0.3, 0.6, 0.9]:
        miss_data = data_processor.gen_miss_values(missing_rate)
        inpute_values = data_processor.inpute_data("MICE")
        Y = inpute_values["Y"]
        X = inpute_values.drop(["Y"], axis=1)
        X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=42)

        y_pred = model.predict(X_test)
        y_pred_class = np.argmax(y_pred, axis=1)
        y_test = y_test[X_test.index].values
        evaluate = data_processor.evaluate(y_test, y_pred_class)
        print(evaluate)
        filename = output_prefix + str(missing_rate).replace(".", "_")
        data_processor.save_txt(filename, evaluate)



      pixel1    pixel2    pixel3    pixel4    pixel5    pixel6     pixel7  \
0        0.0  0.031037  0.000000 -0.032791 -2.522260 -4.730717  -2.530123   
1        0.0  0.000000  0.088601  0.000000  1.719057  0.000000   0.000000   
2        0.0  0.000000  0.000000  0.000000  0.000000  0.000000 -10.570498   
3        0.0 -0.000190  0.000000  0.000000  0.000000  0.000000  -2.879357   
4        0.0  0.000000  0.000000  0.000000  0.000000  0.000000   0.000000   
...      ...       ...       ...       ...       ...       ...        ...   
4195     0.0  0.000000  0.001267  0.000000 -2.400954 -5.230703   0.000000   
4196     0.0  0.000000  0.000000  0.207316  0.000000  0.000000   0.000000   
4197     0.0  0.050149  0.000000  0.316057  1.056878  0.000000   2.066470   
4198     0.0 -0.009097  0.018502  0.000000  0.000000  0.000000   0.000000   
4199     0.0 -0.065658 -0.136832  0.581149 -1.171497  1.708669  -1.673108   

        pixel8     pixel9     pixel10  ...    pixel776    pixel777  \
0    



      col_1     col_2     col_3     col_4     col_5     col_6      col_7  \
0       0.0  0.031037 -0.041089 -0.038895 -2.522260 -4.730717  -2.530123   
1       0.0  0.000000  0.084576 -0.090718  1.719057  2.369358   0.000000   
2       0.0  0.266415 -0.117007  0.181171  5.736278  0.000000 -14.251071   
3       0.0 -0.000190  0.000000  0.009096  0.789877  2.827353   4.734790   
4       0.0 -0.012321 -0.036167  0.054581 -0.216022  0.000000  -1.907344   
...     ...       ...       ...       ...       ...       ...        ...   
4195    0.0 -0.013680  0.044833  0.157372 -2.400954 -5.230703  10.352776   
4196    0.0 -0.006676  0.013039  0.207316 -0.248676 -0.555327   0.000000   
4197    0.0  0.017320 -0.023914 -0.088813  0.580807  0.000000   0.854538   
4198    0.0  0.008471 -0.019009  0.000000  0.000000  0.000000   1.144010   
4199    0.0 -0.065658 -0.136832  0.581149 -1.171497 -9.941305  -1.465308   

          col_8      col_9     col_10  ...     col_776     col_777  \
0    -11.823936  



      col_1     col_2     col_3     col_4     col_5     col_6      col_7  \
0       0.0 -0.001328  0.014123 -0.038895 -0.035139  0.678111  -0.603651   
1       0.0 -0.027246 -0.002129 -0.135176 -0.509713 -0.139682  -1.032968   
2       0.0 -0.025386  0.057290  0.169431  0.825998 -0.924464   4.516044   
3       0.0  0.021208  0.050767 -0.127770 -0.878511  0.592097 -10.701702   
4       0.0 -0.012321 -0.008818  0.180988 -0.086862 -1.864099  -3.459187   
...     ...       ...       ...       ...       ...       ...        ...   
4195    0.0  0.012630  0.015579  0.233361  1.637491 -0.668218  12.654744   
4196    0.0 -0.002724 -0.041798  0.017840  1.106784 -0.899027   2.573490   
4197    0.0 -0.006952  0.088928 -0.019443  0.513327 -0.528991   7.310513   
4198    0.0 -0.004667 -0.022323  0.024764  0.771559 -1.031625   0.345158   
4199    0.0  0.009289 -0.040995  0.005465 -0.371631 -9.941305   3.278316   

          col_8      col_9     col_10  ...     col_776    col_777    col_778  \
0      

      col_1     col_2     col_3     col_4     col_5     col_6      col_7  \
0       0.0 -0.001328  0.012860  0.105506 -0.035139  1.016845  -0.366480   
1       0.0 -0.010453 -0.025568 -0.122127 -0.307628 -0.271079  -0.766827   
2       0.0 -0.004900  0.071726  0.169431  0.299647 -0.344604   4.516044   
3       0.0  0.013584  0.051583 -0.127770 -0.878511  1.057585  -2.497749   
4       0.0 -0.012321 -0.004834  0.178060 -0.229091 -1.631893  -3.459187   
...     ...       ...       ...       ...       ...       ...        ...   
4195    0.0  0.033000  0.028996  0.271966  1.335552 -0.736256  14.478277   
4196    0.0 -0.002863 -0.003319  0.052635  0.854519 -0.670538   1.813064   
4197    0.0 -0.003803  0.054413 -0.019443  0.513327 -0.938162   9.350625   
4198    0.0  0.005557 -0.010636 -0.023421  0.771559 -1.177053   0.345158   
4199    0.0 -0.007257 -0.037921 -0.008162 -0.371631 -0.868220   7.097799   

         col_8      col_9     col_10  ...    col_776    col_777    col_778  \
0    -3.3



      col_1     col_2     col_3     col_4     col_5     col_6     col_7  \
0       0.0  0.022674  0.018728  0.050947  0.481178  0.675065  0.987734   
1       0.0 -0.007273 -0.020385 -0.046630  0.174512 -0.365259 -0.766827   
2       0.0  0.016418  0.050241  0.064562 -0.087971  0.682558  2.480042   
3       0.0 -0.025036 -0.011028 -0.075253 -0.417613  0.146776 -1.910523   
4       0.0 -0.019086  0.042374 -0.019331  0.572899 -1.631893  1.024828   
...     ...       ...       ...       ...       ...       ...       ...   
4195    0.0  0.019765  0.086557  0.213917  1.335552  0.231311  9.813466   
4196    0.0 -0.002493  0.033426  0.026750  0.407963  0.103100 -0.554725   
4197    0.0 -0.000945 -0.018800  0.047926  0.270426 -0.054112  9.350625   
4198    0.0  0.008350 -0.016991 -0.004982  0.165130 -0.600763  1.007900   
4199    0.0 -0.006630  0.006198  0.036525  0.340214 -0.719654  1.416814   

         col_8      col_9     col_10  ...    col_776    col_777    col_778  \
0    -0.766900   5.93

  _warn_prf(average, modifier, msg_start, len(result))


      col_1     col_2     col_3     col_4     col_5     col_6      col_7  \
0       0.0  0.004711  0.019481  0.038068  0.494697  0.767675  -0.130291   
1       0.0  0.002698 -0.024266 -0.114744  0.085777 -0.339234  -1.810880   
2       0.0  0.016418  0.042835  0.053633 -0.087971  0.567339   2.147324   
3       0.0 -0.021292 -0.011028 -0.075253 -0.371938  0.146776  -2.644326   
4       0.0 -0.020196  0.039735  0.034890  0.641892 -1.631893   3.145357   
...     ...       ...       ...       ...       ...       ...        ...   
4195    0.0  0.027117  0.084518  0.265230  1.495271  0.122914  10.023330   
4196    0.0 -0.002493  0.020809  0.017944  0.516658 -0.026817  -0.612416   
4197    0.0 -0.010569 -0.016296  0.045514  0.270426 -0.008289   2.262550   
4198    0.0  0.016889 -0.016991  0.002554  0.165130 -0.574165   2.895617   
4199    0.0 -0.006630  0.006312  0.026787  0.309450 -0.775646   1.416814   

         col_8      col_9     col_10  ...    col_776    col_777    col_778  \
0    -0.9

  _warn_prf(average, modifier, msg_start, len(result))


      col_1     col_2     col_3     col_4     col_5     col_6     col_7  \
0       0.0 -0.003854  0.006039  0.093034  0.219773  0.563244  1.194960   
1       0.0 -0.015515 -0.016851 -0.106564 -0.147247 -0.353810 -0.386556   
2       0.0 -0.037120 -0.049789  0.073940  0.261613  0.953060 -1.548891   
3       0.0 -0.021210  0.029643 -0.078240  0.021837  0.146776 -0.516234   
4       0.0  0.005410  0.008757  0.008585  0.389645 -0.444501 -0.219190   
...     ...       ...       ...       ...       ...       ...       ...   
4195    0.0  0.013945  0.057906  0.127025  1.158044  0.213828  4.471332   
4196    0.0 -0.002633 -0.007163  0.001892  0.691392 -0.137443 -0.612416   
4197    0.0 -0.010569  0.001542  0.028791  0.111856  0.174936  0.068759   
4198    0.0 -0.008660  0.008268  0.006922  0.202743 -0.472651  0.222007   
4199    0.0  0.012828  0.020870  0.025011  0.316711 -0.020757 -0.321065   

         col_8      col_9     col_10  ...    col_776    col_777    col_778  \
0     2.910015  12.26

  _warn_prf(average, modifier, msg_start, len(result))


In [5]:
data_processor = DataProcessor()
load_data = data_processor.import_data()
test_size = 0.95  # Konstante Testgröße

for model_number in range(0, 3):  # Modelle 1-3 durchlaufen
    model_name = f"Model_{model_number + 1}"
    output_prefix = f"prediction_ref_kNN_D2_{model_name}_"
    model = data_processor.model(model_number)
    
    for missing_rate in [0.3, 0.6, 0.9]:
        miss_data = data_processor.gen_miss_values(missing_rate)
        inpute_values = data_processor.inpute_data("kNN")

        Y = inpute_values["Y"]
        X = inpute_values.drop(["Y"], axis=1)
        X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=42)

        y_pred = model.predict(X_test)
        y_pred_class = np.argmax(y_pred, axis=1)
        y_test = y_test[X_test.index].values
        evaluate = data_processor.evaluate(y_test, y_pred_class)
        print(evaluate)
        filename = output_prefix + str(missing_rate).replace(".", "_")
        data_processor.save_txt(filename, evaluate)

NameError: name 'DataProcessor' is not defined

In [8]:
data_processor = DataProcessor()
load_data = data_processor.import_data()
test_size = 0.95  # Konstante Testgröße

for model_number in range(0, 3):  # Modelle 1-3 durchlaufen
    model_name = f"Model_{model_number + 1}"
    output_prefix = f"prediction_ref_RF_D2_{model_name}_"
    model = data_processor.model(model_number)
    
    for missing_rate in [0.3, 0.6, 0.9]:
        miss_data = data_processor.gen_miss_values(missing_rate)
        inpute_values = data_processor.inpute_data("RF")

        Y = inpute_values["Y"]
        X = inpute_values.drop(["Y"], axis=1)
        X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=42)

        y_pred = model.predict(X_test)
        y_pred_class = np.argmax(y_pred, axis=1)
        y_test = y_test[X_test.index].values
        evaluate = data_processor.evaluate(y_test, y_pred_class)
        print(evaluate)
        filename = output_prefix + str(missing_rate).replace(".", "_")
        data_processor.save_txt(filename, evaluate)

Iteration: 0
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
{'confusion_matrix': array([[292,   0,   3,  33,   4,   0,  45,   0,   6,   0],
       [  2, 376,   0,  20,   3,   0,   0,   0,   0,   0],
       [  1,   0, 247,   5, 138,   0,  14,   0,   3,   0],
       [  5,   2,   0, 378,  10,   0,   8,   0,   0,   0],
       [  0,   0,  15,  37, 347,   0,   6,   0,   2,   0],
       [  0,   0,   0,   0,   0, 387,   0,  19,   0,   8],
       [ 55,   1,  28,  36, 182,   0,  88,   0,   8,   0],
       [  0,   0,   0,   0,   0,   8,   0, 375,   1,  11],
       [  0,   0,   2,   3,   5,   1,   2,   2, 385,   0],
       [  0,   0,   0,   0,   0,   7,   0,  21,   0, 353]], dtype=int64), 'accuracy': 0.8090225563909774, 'precision': array([0.82253521, 0.99208443, 0.83728814, 0.73828125, 0.50362845,
       0.96029777, 0.5398773 , 0.89928058, 0.95061728, 0.94892473]), 'recall': array([0.76240209, 0.93765586, 0.60539216, 0.93796526, 0.85257985,
       0.93478261, 0.22110553, 0.94936709, 0.9625  

KeyboardInterrupt: 

In [7]:
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
