### 1. CART (Classification and Regression Trees) - DecisionTree Classifier
- Sampling Technique - Train/Test Split (80:20)
- Classification Metrics - Accuracy

In [1]:
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import numpy as np

# Load the dataset
filename = './cirrhosis.csv'
dataframe = pd.read_csv(filename)

# Separate features and target variable
X = dataframe.drop('Status', axis=1)
Y = dataframe['Status']

# Encode the target variable (Status) using label encoding
le = LabelEncoder()
Y = le.fit_transform(Y)

# Display the mapping of labels to numerical values
label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print("Label Mapping:")
for label, value in label_mapping.items():
    print(f"Label: {label}, Numerical Value: {value}")

# Initialize the Decision Tree Classifier with hyperparameters
max_depth = 5  # You can adjust this value
min_samples_split = 2  # You can adjust this value
min_samples_leaf = 1  # You can adjust this value
random_seed = 50

model = DecisionTreeClassifier(
    max_depth=max_depth,
    min_samples_split=min_samples_split,
    min_samples_leaf=min_samples_leaf,
    random_state=random_seed
)

# Use K-fold Cross Validation
num_folds = 5  # You can adjust the number of folds
kf = KFold(n_splits=num_folds, random_state=random_seed, shuffle=True)

accuracies = []

for fold, (train_index, test_index) in enumerate(kf.split(X, Y), 1):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]

    # Train the model
    model.fit(X_train, Y_train)

    # Evaluate the accuracy for each fold
    accuracy = model.score(X_test, Y_test)
    accuracies.append(accuracy)
    print(f"Fold {fold} - Accuracy: {accuracy:.3f}")

    # Generate the classification report for each fold
    classification_report_fold = classification_report(Y_test, model.predict(X_test))
    print(f"Fold {fold} - Classification Report:")
    print(classification_report_fold)

# Calculate and print the average accuracy
average_accuracy = np.mean(accuracies)
print("\nAverage Accuracy:", f"{average_accuracy:.3f}")


Label Mapping:
Label: C, Numerical Value: 0
Label: CL, Numerical Value: 1
Label: D, Numerical Value: 2
Fold 1 - Accuracy: 0.762
Fold 1 - Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.91      0.82        45
           1       0.00      0.00      0.00         3
           2       0.92      0.64      0.75        36

    accuracy                           0.76        84
   macro avg       0.56      0.52      0.52        84
weighted avg       0.79      0.76      0.76        84

Fold 2 - Accuracy: 0.714
Fold 2 - Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.89      0.80        46
           1       0.00      0.00      0.00         4
           2       0.83      0.56      0.67        34

    accuracy                           0.71        84
   macro avg       0.52      0.48      0.49        84
weighted avg       0.73      0.71      0.71        84

Fold 3 - Accuracy: 0.

### 2. Gaussian Naive Bayes
- Sampling Technique - Train/Test Split (80:20)
- Classification Metrics - Accuracy

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# Load the dataset
filename = './cirrhosis.csv'
dataframe = pd.read_csv(filename)

# Separate features and target variable
X = dataframe.drop('Status', axis=1)
Y = dataframe['Status']

# Impute missing values with the mean
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer = imputer.fit(X)
X = imputer.transform(X)

# Encode the target variable (Status) using label encoding
le = LabelEncoder()
Y = le.fit_transform(Y)

# Display the mapping of labels to numerical values
label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print("Label Mapping:")
for label, value in label_mapping.items():
    print(f"Label: {label}, Numerical Value: {value}")

# Set the random seed
seed = 7  # You can adjust this value
# Set the number of folds for K-fold Cross Validation
num_folds = 5  # You can adjust the number of folds
kf = KFold(n_splits=num_folds, random_state=seed, shuffle=True)

# Create a StandardScaler object to standardize the features
scaler = StandardScaler()

# Initialize Gaussian Naive Bayes classifier
model = GaussianNB(priors=None, var_smoothing=1e-9)  # Hyperparameters: priors and var_smoothing

accuracies = []

for fold, (train_index, test_index) in enumerate(kf.split(X, Y), 1):
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]

    # Standardize the training data
    X_train_scaled = scaler.fit_transform(X_train)

    # Standardize the testing data
    X_test_scaled = scaler.transform(X_test)

    # Train the model on the standardized training data
    model.fit(X_train_scaled, Y_train)

    # Evaluate the accuracy for each fold
    result = model.score(X_test_scaled, Y_test)
    accuracies.append(result)
    print(f"Fold {fold} - Accuracy: {result:.3f}")

    # Generate the classification report for each fold
    classification_report_fold = classification_report(Y_test, model.predict(X_test_scaled))
    print(f"Fold {fold} - Classification Report:")
    print(classification_report_fold)

# Calculate and print the average accuracy
average_accuracy = np.mean(accuracies)
print("\nAverage Accuracy:", f"{average_accuracy:.3f}")


Label Mapping:
Label: C, Numerical Value: 0
Label: CL, Numerical Value: 1
Label: D, Numerical Value: 2
Fold 1 - Accuracy: 0.738
Fold 1 - Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.89      0.81        47
           1       0.00      0.00      0.00         2
           2       0.83      0.57      0.68        35

    accuracy                           0.74        84
   macro avg       0.52      0.49      0.50        84
weighted avg       0.76      0.74      0.73        84

Fold 2 - Accuracy: 0.690
Fold 2 - Classification Report:
              precision    recall  f1-score   support

           0       0.68      0.93      0.79        42
           1       0.25      0.08      0.12        12
           2       0.78      0.60      0.68        30

    accuracy                           0.69        84
   macro avg       0.57      0.54      0.53        84
weighted avg       0.66      0.69      0.65        84

Fold 3 - Accuracy: 0.

### 3. Gradient Boosting Machines (AdaBoost)

In [12]:
from pandas import read_csv
import numpy as np
from sklearn.model_selection import KFold
from sklearn.ensemble import AdaBoostClassifier
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, precision_recall_fscore_support

# Load the dataset
filename = './cirrhosis.csv'
dataframe = read_csv(filename)

# Separate features and target variable
X = dataframe.drop('Status', axis=1)
Y = dataframe['Status']

# Impute missing values with the mean
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
X = imputer.fit_transform(X)

# Encode the target variable (Status) using label encoding
le = LabelEncoder()
Y = le.fit_transform(Y)

# Display the mapping of labels to numerical values
label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print("Label Mapping:")
for label, value in label_mapping.items():
    print(f"Label: {label}, Numerical Value: {value}")

# Set the random seed
seed = 7  # You can adjust this value
# Set the number of folds for K-fold Cross Validation
num_folds = 5  # You can adjust the number of folds
kf = KFold(n_splits=num_folds, random_state=seed, shuffle=True)

# Create an AdaBoost classifier
model = AdaBoostClassifier(n_estimators=50, random_state=seed)
# Hyperparameters:
# - n_estimators: The number of weak classifiers (base estimators) to train. You can adjust this to control the complexity of the ensemble.
# - random_state: The random seed for reproducibility. You can set this to a specific value if you want consistent results.

# Initialize lists to store accuracy and classification reports for each fold
accuracies = []
classification_reports = []

for fold, (train_index, test_index) in enumerate(kf.split(X, Y), 1):
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]

    # Train the model on the training data
    model.fit(X_train, Y_train)

    # Evaluate the accuracy for each fold
    result = model.score(X_test, Y_test)
    accuracies.append(result)
    print(f"\nFold {fold} - Accuracy: {result:.3f}")

    # Generate the classification report for each fold
    y_pred = model.predict(X_test)
    classification_report_fold = precision_recall_fscore_support(Y_test, y_pred, average='weighted')
    if None not in classification_report_fold:
        classification_reports.append(classification_report_fold)
        print(f"Fold {fold} - Classification Report:")
        print(classification_report(Y_test, y_pred, target_names=label_mapping.keys()))

# Check if there are any performance metrics in the list
if classification_reports:
    # Calculate and print the average accuracy
    average_accuracy = np.mean(accuracies)
    print("\nAverage Accuracy:", f"{average_accuracy:.3f}")

    # Print the average precision, recall, F1-score, and support over all folds
    average_metrics = np.mean([np.array(report) for report in classification_reports], axis=0)
    precision, recall, f1, support = average_metrics[:4]

    print("\nAverage Metrics:")
    print(f"Precision: {precision:.3f}")
    print(f"Recall: {recall:.3f}")
    print(f"F1-Score: {f1:.3f}")
    print(f"Support: {support:.3f}")
else:
    print("No classification reports available.")


Label Mapping:
Label: C, Numerical Value: 0
Label: CL, Numerical Value: 1
Label: D, Numerical Value: 2

Fold 1 - Accuracy: 0.833

Fold 2 - Accuracy: 0.702

Fold 3 - Accuracy: 0.750

Fold 4 - Accuracy: 0.783

Fold 5 - Accuracy: 0.759
No classification reports available.


### 4. K-Nearest Neighbors (K-NN)
- Sampling Technique - Train/Test Split (80:20)
- Classification Metrics - Accuracy

In [29]:
from pandas import read_csv
import numpy as np
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_recall_fscore_support

# Load the dataset
filename = './cirrhosis.csv'
dataframe = read_csv(filename)

# Separate features and target variable
X = dataframe.drop('Status', axis=1)
Y = dataframe['Status']

# Impute missing values with the mean
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer = imputer.fit(X)
X = imputer.transform(X)

# Encode the target variable (Status) using label encoding
le = LabelEncoder()
Y = le.fit_transform(Y)

# Display the mapping of labels to numerical values
label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print("Label Mapping:")
for label, value in label_mapping.items():
    print(f"Label: {label}, Numerical Value: {value}")

# Set the number of folds for K-fold Cross Validation
num_folds = 5  # You can adjust the number of folds
kf = KFold(n_splits=num_folds, random_state=seed, shuffle=True)

# Create a K-Nearest Neighbors (K-NN) classifier
model = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto')

# Initialize lists to store precision and support for each fold
precision_list = []
support_list = []

for fold, (train_index, test_index) in enumerate(kf.split(X, Y), 1):
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]

    # Train the model on the training data
    model.fit(X_train, Y_train)

    # Generate the classification report for each fold
    metrics_fold = precision_recall_fscore_support(Y_test, model.predict(X_test), zero_division=1, average='weighted')
    precision_list.append(metrics_fold[0])

    # Calculate the support separately
    support_fold = np.sum(metrics_fold[3])
    support_list.append(support_fold)

    print(f"\nFold {fold} - Precision: {metrics_fold[0]:.3f}")

# Check if there are any performance metrics in the list
if precision_list:
    # Print the average precision over all folds
    average_precision = np.mean(precision_list)
    print("\nAverage Precision:", f"{average_precision:.3f}")
else:
    print("No precision values available.")


Label Mapping:
Label: C, Numerical Value: 0
Label: CL, Numerical Value: 1
Label: D, Numerical Value: 2

Fold 1 - Precision: 0.729

Fold 2 - Precision: 0.627

Fold 3 - Precision: 0.685

Fold 4 - Precision: 0.667

Fold 5 - Precision: 0.727

Average Precision: 0.687


### 5. Logistic Regression
- Sampling Technique - Train/Test Split (80:20)
- Classification Metrics - Accuracy

In [36]:
from pandas import read_csv
import numpy as np
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score
import warnings

# Ignore warnings for this example
warnings.filterwarnings("ignore", category=UserWarning, module='sklearn')

# Load the dataset
filename = './cirrhosis.csv'
dataframe = read_csv(filename)

# Separate features and target variable
X = dataframe.drop('Status', axis=1)
Y = dataframe['Status']

# Impute missing values with the mean
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer = imputer.fit(X)
X = imputer.transform(X)

# Encode the target variable (Status) using label encoding
le = LabelEncoder()
Y = le.fit_transform(Y)

# Display the mapping of labels to numerical values
label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print("Label Mapping:")
for label, value in label_mapping.items():
    print(f"Label: {label}, Numerical Value: {value}")

# Set the number of folds for K-fold Cross Validation
num_folds = 5  # You can adjust the number of folds
kf = KFold(n_splits=num_folds, random_state=seed, shuffle=True)

# Create a Logistic Regression model with increased max_iter
model = LogisticRegression(max_iter=5000, solver='lbfgs', C=1.0)  # Increased max_iter

# Initialize lists to store precision for each fold
precision_list = []

for fold, (train_index, test_index) in enumerate(kf.split(X, Y), 1):
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]

    # Train the model on the training data
    model.fit(X_train, Y_train)

    # Calculate precision for each fold
    precision_fold = precision_score(Y_test, model.predict(X_test), average='weighted')
    print(f"\nFold {fold} - Precision: {precision_fold:.3f}")

    # Append precision to the list
    precision_list.append(precision_fold)

# Check if there are any precision values in the list
if precision_list:
    # Print the average precision over all folds
    average_precision = np.mean(precision_list)
    print("\nAverage Precision:")
    print(f"{average_precision:.3f}")
else:
    print("No precision values available.")


Label Mapping:
Label: C, Numerical Value: 0
Label: CL, Numerical Value: 1
Label: D, Numerical Value: 2

Fold 1 - Precision: 0.839

Fold 2 - Precision: 0.628

Fold 3 - Precision: 0.827

Fold 4 - Precision: 0.771

Fold 5 - Precision: 0.797

Average Precision:
0.772


### 6. Multi-Layer Perceptron (MLP)
- Sampling Technique - Train/Test Split (80:20)
- Classification Metrics - Accuracy


In [38]:
from pandas import read_csv
import numpy as np
from sklearn.model_selection import KFold
from sklearn.neural_network import MLPClassifier
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import warnings

# Ignore warnings for this example
warnings.filterwarnings("ignore", category=UserWarning, module='sklearn')

# Load the dataset
filename = './cirrhosis.csv'
dataframe = read_csv(filename)

# Separate features and target variable
X = dataframe.drop('Status', axis=1)
Y = dataframe['Status']

# Impute missing values with the mean
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer = imputer.fit(X)
X = imputer.transform(X)

# Encode the target variable (Status) using label encoding
le = LabelEncoder()
Y = le.fit_transform(Y)

# Display the mapping of labels to numerical values
label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print("Label Mapping:")
for label, value in label_mapping.items():
    print(f"Label: {label}, Numerical Value: {value}")

# Set the number of folds for K-fold Cross Validation
num_folds = 5  # You can adjust the number of folds
kf = KFold(n_splits=num_folds, random_state=seed, shuffle=True)

# Create an MLP-based model
model = MLPClassifier(hidden_layer_sizes=(65, 32), activation='relu', solver='adam', max_iter=200, random_state=seed)

# Initialize lists to store precision and support for each fold
precision_list = []
support_list = []

for fold, (train_index, test_index) in enumerate(kf.split(X, Y), 1):
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]

    # Train the model on the training data
    model.fit(X_train, Y_train)

    # Generate classification report for each fold
    report_fold = classification_report(Y_test, model.predict(X_test), output_dict=True)
    
    # Extract precision and support from the report
    precision_list.append(report_fold['weighted avg']['precision'])
    support_fold = np.sum(report_fold['weighted avg']['support'])
    support_list.append(support_fold)

    print(f"\nFold {fold} - Precision: {report_fold['weighted avg']['precision']:.3f}")

# Check if there are any precision values in the list
if precision_list:
    # Print the average precision over all folds
    average_precision = np.average(precision_list, weights=support_list)
    print("\nAverage Precision:")
    print(f"{average_precision:.3f}")
else:
    print("No precision values available.")


Label Mapping:
Label: C, Numerical Value: 0
Label: CL, Numerical Value: 1
Label: D, Numerical Value: 2

Fold 1 - Precision: 0.720

Fold 2 - Precision: 0.487

Fold 3 - Precision: 0.748

Fold 4 - Precision: 0.497

Fold 5 - Precision: 0.718

Average Precision:
0.634


### 7. Perceptron
- Sampling Technique - Train/Test Split (80:20)
- Classification Metrics - Accuracy

In [39]:
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.linear_model import Perceptron
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import numpy as np
import warnings

# Ignore warnings for this example
warnings.filterwarnings("ignore", category=UserWarning, module='sklearn')

# Load the dataset
filename = './cirrhosis.csv'
dataframe = read_csv(filename)

# Separate features and target variable
X = dataframe.drop('Status', axis=1)
Y = dataframe['Status']

# Impute missing values with the mean
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer = imputer.fit(X)
X = imputer.transform(X)

# Encode the target variable (Status) using label encoding
le = LabelEncoder()
Y = le.fit_transform(Y)

# Display the mapping of labels to numerical values
label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print("Label Mapping:")
for label, value in label_mapping.items():
    print(f"Label: {label}, Numerical Value: {value}")

# Set the number of folds for K-fold Cross Validation
num_folds = 5  # You can adjust the number of folds
kf = KFold(n_splits=num_folds, random_state=seed, shuffle=True)

# Create a Perceptron classifier
model = Perceptron(max_iter=200, random_state=seed, eta0=1.0, tol=1e-3)

# Initialize lists to store precision and support for each fold
precision_list = []
support_list = []

for fold, (train_index, test_index) in enumerate(kf.split(X, Y), 1):
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]

    # Train the model on the training data
    model.fit(X_train, Y_train)

    # Generate classification report for each fold
    report_fold = classification_report(Y_test, model.predict(X_test), output_dict=True)
    
    # Extract precision and support from the report
    precision_list.append(report_fold['weighted avg']['precision'])
    support_fold = np.sum(report_fold['weighted avg']['support'])
    support_list.append(support_fold)

    print(f"\nFold {fold} - Precision: {report_fold['weighted avg']['precision']:.3f}")

# Check if there are any precision values in the list
if precision_list:
    # Print the average precision over all folds
    average_precision = np.average(precision_list, weights=support_list)
    print("\nAverage Precision:")
    print(f"{average_precision:.3f}")
else:
    print("No precision values available.")


Label Mapping:
Label: C, Numerical Value: 0
Label: CL, Numerical Value: 1
Label: D, Numerical Value: 2

Fold 1 - Precision: 0.686

Fold 2 - Precision: 0.579

Fold 3 - Precision: 0.758

Fold 4 - Precision: 0.639

Fold 5 - Precision: 0.706

Average Precision:
0.673


### 8. Random Forest
- Sampling Technique - Train/Test Split (80:20)
- Classification Metrics - Accuracy

In [40]:
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import numpy as np

# Load the dataset
filename = './cirrhosis.csv'
dataframe = read_csv(filename)

# Separate features and target variable
X = dataframe.drop('Status', axis=1)
Y = dataframe['Status']

# Impute missing values with the mean
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer = imputer.fit(X)
X = imputer.transform(X)

# Encode the target variable (Status) using label encoding
le = LabelEncoder()
Y = le.fit_transform(Y)

# Display the mapping of labels to numerical values
label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print("Label Mapping:")
for label, value in label_mapping.items():
    print(f"Label: {label}, Numerical Value: {value}")

# Set the number of folds for K-fold Cross Validation
num_folds = 5  # You can adjust the number of folds
kf = KFold(n_splits=num_folds, random_state=seed, shuffle=True)

# Create a Random Forest classifier
rfmodel = RandomForestClassifier(n_estimators=100, random_state=seed, max_depth=None, min_samples_split=2, min_samples_leaf=1)

# Initialize lists to store precision and support for each fold
precision_list = []
support_list = []

for fold, (train_index, test_index) in enumerate(kf.split(X, Y), 1):
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]

    # Train the model on the training data
    rfmodel.fit(X_train, Y_train)

    # Generate classification report for each fold
    report_fold = classification_report(Y_test, rfmodel.predict(X_test), output_dict=True)
    
    # Extract precision and support from the report
    precision_list.append(report_fold['weighted avg']['precision'])
    support_fold = np.sum(report_fold['weighted avg']['support'])
    support_list.append(support_fold)

    print(f"\nFold {fold} - Precision: {report_fold['weighted avg']['precision']:.3f}")

# Check if there are any precision values in the list
if precision_list:
    # Print the average precision over all folds
    average_precision = np.average(precision_list, weights=support_list)
    print("\nAverage Precision:")
    print(f"{average_precision:.3f}")
else:
    print("No precision values available.")


Label Mapping:
Label: C, Numerical Value: 0
Label: CL, Numerical Value: 1
Label: D, Numerical Value: 2

Fold 1 - Precision: 0.762

Fold 2 - Precision: 0.640

Fold 3 - Precision: 0.840

Fold 4 - Precision: 0.727

Fold 5 - Precision: 0.829

Average Precision:
0.759


### 9. Support Vector Machines (SVM)
- Sampling Technique - Train/Test Split (80:20)
- Classification Metrics - Accuracy

In [41]:
from pandas import read_csv
import numpy as np
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# Load the dataset
filename = './cirrhosis.csv'
dataframe = read_csv(filename)

# Separate features and target variable
X = dataframe.drop('Status', axis=1)
Y = dataframe['Status']

# Impute missing values with the mean
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer = imputer.fit(X)
X = imputer.transform(X)

# Encode the target variable (Status) using label encoding
le = LabelEncoder()
Y = le.fit_transform(Y)

# Display the mapping of labels to numerical values
label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print("Label Mapping:")
for label, value in label_mapping.items():
    print(f"Label: {label}, Numerical Value: {value}")

# Set the number of folds for K-fold Cross Validation
num_folds = 5  # You can adjust the number of folds
kf = KFold(n_splits=num_folds, random_state=seed, shuffle=True)

# Create a K-Nearest Neighbors (K-NN) classifier
model = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto')

# Initialize lists to store precision and support for each fold
precision_list = []
support_list = []

for fold, (train_index, test_index) in enumerate(kf.split(X, Y), 1):
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]

    # Train the model on the training data
    model.fit(X_train, Y_train)

    # Generate the classification report for each fold
    report_fold = classification_report(Y_test, model.predict(X_test), output_dict=True)
    
    # Extract precision and support from the report
    precision_list.append(report_fold['weighted avg']['precision'])
    support_fold = np.sum(report_fold['weighted avg']['support'])
    support_list.append(support_fold)

    print(f"\nFold {fold} - Precision: {report_fold['weighted avg']['precision']:.3f}")

# Check if there are any precision values in the list
if precision_list:
    # Print the average precision over all folds
    average_precision = np.average(precision_list, weights=support_list)
    print("\nAverage Precision:", f"{average_precision:.3f}")
else:
    print("No precision values available.")


Label Mapping:
Label: C, Numerical Value: 0
Label: CL, Numerical Value: 1
Label: D, Numerical Value: 2

Fold 1 - Precision: 0.705

Fold 2 - Precision: 0.484

Fold 3 - Precision: 0.685

Fold 4 - Precision: 0.667

Fold 5 - Precision: 0.691

Average Precision: 0.646
