In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from itertools import product

from Logistic_Regression import LogisticRegression
from Decision_Tree import DecisionTree
from Random_Forest import RandomForest
from ANN_Pytorch import ANN
from prepare_data import DataPreprocessor
from generate_data import DataGenerator

# Configuration
n_classes_dep_var = [2, 3, 4]
n_ind_vars = [3, 5, 7]
n_categorical_vars = np.arange(7)
n_classes_ind_vars = [2, 3]
n_samples = [100, 500, 1000, 10000]

# Initialize preprocessor
preprocessor = DataPreprocessor()

# Initialize results storage
results_list = []

# Perform iterations
for iteration in range(1):
    print(f"Starting iteration {iteration + 1}...")

    for n_classes_dep, n_ind, n_categorical, n_classes_ind, n_samp in product(
        n_classes_dep_var, n_ind_vars, n_categorical_vars, n_classes_ind_vars, n_samples
    ):
        # Generate data
        data_generator = DataGenerator(
            n_classes_dep_var=n_classes_dep,
            n_ind_vars=n_ind,
            n_categorical_vars=n_categorical,
            n_classes_ind_vars=n_classes_ind,
            n_samples=n_samp
        )
        data = data_generator.generate_data()

        if data is None:
            continue

        # Preprocess data
        X, y = preprocessor.preprocess(data)

        # Logistic Regression
        for link_function in ['logit', 'probit']:
            model = LogisticRegression(link_function=link_function)
            scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
            results_list.append([
                iteration + 1, n_classes_dep, n_ind, n_categorical, n_classes_ind, n_samp,
                'LogisticRegression', link_function, None, None, 1 - scores.mean(), scores.mean(), None, None, None
            ])

        # Decision Tree
        model = DecisionTree()
        scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
        results_list.append([
            iteration + 1, n_classes_dep, n_ind, n_categorical, n_classes_ind, n_samp,
            'DecisionTree', None, None, None, 1 - scores.mean(), scores.mean(), None, None, None
        ])

        # Random Forest
        model = RandomForest()
        scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
        results_list.append([
            iteration + 1, n_classes_dep, n_ind, n_categorical, n_classes_ind, n_samp,
            'RandomForest', None, None, None, 1 - scores.mean(), scores.mean(), None, None, None
        ])

        # ANN
        model = ANN(nInput=X.shape[1], nOutput=1)
        scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
        results_list.append([
            iteration + 1, n_classes_dep, n_ind, n_categorical, n_classes_ind, n_samp,
            'ANN', None, None, None, 1 - scores.mean(), scores.mean(), None, None, None
        ])

# Save results to a DataFrame
results_df = pd.DataFrame(results_list, columns=[
    'Iteration', 'Classes_Dep_Var', 'Num_Ind_Vars', 'Num_Categorical_Vars',
    'Classes_Ind_Vars', 'Sample_Size', 'Model', 'Hyperparameter_1', 'Hyperparameter_2',
    'Hyperparameter_3', 'Misclassification', 'Accuracy', 'Precision', 'Recall', 'F1_Score'
])

# Save results to a CSV file
# results_df.to_csv('model_results_cv.csv', index=False)
# print("Results saved to model_results_cv.csv.")

Starting iteration 1...


ValueError: too many values to unpack (expected 2)