In [None]:
from ucimlrepo import fetch_ucirepo 
from sklearn.datasets import fetch_openml
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import pandas as pd
import warnings
from sklearn.exceptions import ConvergenceWarning

In [None]:
def OneHot(y_in, K ):    
    encoder = OneHotEncoder(categories='auto', sparse_output=False)
    y = y_in
    # pandas DataFrame to  numpy array
    if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series):
        y = y.to_numpy()
        # OneHotEncoding
        y = y.reshape(-1, 1) 
        y_out = encoder.fit_transform(y)
    elif isinstance(y, pd.DataFrame) and all(y.dtypes == object):
        label_encoder = LabelEncoder()
        y = label_encoder.fit_transform(y)
        labels_reshaped = np.array(y).reshape(-1, 1)
        y_out = encoder.fit_transform(labels_reshaped)
    else:
        y = y.reshape(-1, 1) 
        y_out = encoder.fit_transform(y)
    
    return y_out

In [None]:
# fetch dataset 
iris = fetch_ucirepo(id=53) 
  
# data (as pandas dataframes) 
iris_X = iris.data.features 
iris_y = iris.data.targets 
# Normalize X
scaler = StandardScaler()
iris_X = scaler.fit_transform(iris_X)
label_encoder = LabelEncoder()
int_iris_y = label_encoder.fit_transform(iris_y)
onehot_iris_y = OneHot(iris_y,3)

# 결과 출력 (선택 사항)
print("Encoded Features Shape:", iris_X.shape)
print("Targets Shape:", int_iris_y.shape)
print("OneHot Encoded Targets Shape:", onehot_iris_y.shape)

In [None]:
def encode_categorical_features(df):
    categorical_cols = df.select_dtypes(include=['object']).columns
    encoder = OneHotEncoder(sparse_output=False)
    
    encoded_df = pd.DataFrame(encoder.fit_transform(df[categorical_cols]))
    encoded_df.columns = encoder.get_feature_names_out(categorical_cols)
    
    df = df.drop(categorical_cols, axis=1)
    df = pd.concat([df, encoded_df], axis=1)
    
    return df

In [None]:
# Fetch dataset
mushroom = fetch_ucirepo(id=73) 
mushroom_X = mushroom.data.features
mushroom_y = mushroom.data.targets

mushroom_X = encode_categorical_features(mushroom_X)

# Normalize X
scaler = StandardScaler()
mushroom_X = scaler.fit_transform(mushroom_X)
label_encoder = LabelEncoder()
int_mushroom_y = label_encoder.fit_transform(mushroom_y)
onehot_mushroom_y = OneHot(mushroom_y,2)


# 결과 출력 (선택 사항)
print("Encoded Features Shape:", mushroom_X.shape)
print("Targets Shape:", int_mushroom_y.shape)
print("OneHot Encoded Targets Shape:", onehot_mushroom_y.shape)

In [None]:
# fetch dataset 
optical_recognition_of_handwritten_digits = fetch_ucirepo(id=80) 
  
# data (as pandas dataframes) 
optical_recognition_of_handwritten_digits_X = optical_recognition_of_handwritten_digits.data.features 
optical_recognition_of_handwritten_digits_y = optical_recognition_of_handwritten_digits.data.targets 

optical_recognition_of_handwritten_digits_X = encode_categorical_features(optical_recognition_of_handwritten_digits_X)
# Normalize X
scaler = StandardScaler()
digits_X = scaler.fit_transform(optical_recognition_of_handwritten_digits_X)
int_digits_y = optical_recognition_of_handwritten_digits_y.to_numpy().reshape(-1)
onehot_digits_y = OneHot(optical_recognition_of_handwritten_digits_y,10)

# 결과 출력 (선택 사항)
print("Encoded Features Shape:", digits_X.shape)
print("Targets Shape:", int_digits_y.shape)
print("OneHot Encoded Targets Shape:", onehot_digits_y.shape)


In [None]:
from sklearn.model_selection import KFold
def cross_validate(model, X, y, folds=5):
    kf = KFold(n_splits=folds)
    results = []
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        model.fit(X_train, y_train)
        val = model.score(X_test, y_test) *100
        results.append(val.round(2))
    print(results)
    ans = np.mean(results)
    return ans.round(2)


In [None]:
datasetlist=[["Digits",digits_X,onehot_digits_y],["Mushroom",mushroom_X,onehot_mushroom_y],["IRIS",iris_X,onehot_iris_y]]

In [None]:
# Example settings for 3-layer MLP
hidden_node_sizes = [128, 64, 32]
epochs = 100  # Increase the number of epochs

# Suppress convergence warnings
warnings.filterwarnings('ignore', category=ConvergenceWarning)

# Train and evaluate models
for set_name, X, y in datasetlist:
    print(set_name)
    for hidden_nodes in hidden_node_sizes:   
        model = MLPClassifier(
            hidden_layer_sizes=(hidden_nodes, hidden_nodes), 
            max_iter=epochs, 
            activation='relu',
            learning_rate_init=1e-3,
            random_state=42
        )
        scores = cross_validate(model, X, y)
        
        # Print the mean accuracy and standard deviation
        print(f"Nodes: {hidden_nodes}, Score: {scores:.2f} ")


In [None]:
datasetlist=[["Digits",digits_X,int_digits_y],["Mushroom",mushroom_X,int_mushroom_y],["IRIS",iris_X,int_iris_y]]

In [None]:
# Example settings for SVM
kernels = ['linear', 'poly', 'rbf']
degrees = [2, 3]  # Only for polynomial kernel
gammas = [0.01, 0.1, 1]  # Only for RBF kernel
# Loop through all combinations (example)
for set_name, X, y in datasetlist:
    print(set_name)
    for kernel in kernels:
        if kernel == 'poly':
            for degree in degrees:
                model = SVC(kernel=kernel, C=1, degree=degree)
                score = cross_validate(model, X, y)
                print(f"Kernel: {kernel}, Degree: {degree}, Score: {score:.2f}")
        elif kernel == 'rbf':
            for gamma in gammas:
                model = SVC(kernel=kernel, C=1, gamma=gamma)
                score = cross_validate(model, X, y)
                print(f"Kernel: {kernel}, Gamma: {gamma}, Score: {score:.2f}")
        else:
            model = SVC(kernel=kernel, C=1)
            score = cross_validate(model, X, y)
            print(f"Kernel: {kernel}, Score: {score:.2f}")


In [None]:
def RM(X, order):
    # Build regressor matrix P (mxK):
    # order = desired order of approximation,
    # X = input matrix (mxl), K = number of parameters to be est.
    # m = number of data samples, l = input dimension.
    m, l = X.shape
    MM1 = []
    MM3 = []
    Msum = np.sum(X, axis=1)
    for i in range(1, order+1):
        M1 = np.zeros((m, l))
        M3 = np.zeros((m, l))
        for k in range(l):
            M1[:, k] = X[:, k]**i
            if i > 1:
                M3[:, k] = X[:, k] * Msum**(i-1)
        MM1.append(M1)
        if i > 1:
            MM3.append(M3)
    if MM3:
        P = np.concatenate([np.ones((m, 1)), np.concatenate(MM1, axis=1), np.concatenate(MM3, axis=1)], axis=1)
    else:
        P = np.concatenate([np.ones((m, 1)), np.concatenate(MM1, axis=1)], axis=1)
    return P

X = np.array([[1, 2], [3, 4], [5, 6]])
order = 2
P = RM(X, order)

In [None]:
# Example settings for RM model
orders = [1, 2, 3, 4, 5]

# Loop through all orders (example)
for set_name, X, y in datasetlist:
    print(set_name)
    for order in orders:
        P = RM(X, order)
        # Perform linear regression
        model = LogisticRegression()
        score = cross_validate(model, P, y)
        print(f"Order: {order}, Score: {score}")


In [None]:
# Assuming results are stored in dictionaries
results = {
    'SVM': {'linear': 0.85, 'poly': 0.82, 'rbf': 0.87},
    'MLP': {'10 nodes': 0.80, '20 nodes': 0.83, '50 nodes': 0.85},
    'RM': {'order 1': 0.78, 'order 2': 0.81, 'order 3': 0.84, 'order 4': 0.86, 'order 5': 0.85}
}

print("Model Comparison:")
for model, scores in results.items():
    print(f"{model}:")
    for setting, score in scores.items():
        print(f"  {setting}: {score}")


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, cross_val_score, KFold
datasetlist=[["Digits",digits_X,int_digits_y],["Mushroom",mushroom_X,int_mushroom_y],["IRIS",iris_X,int_iris_y]]

# Define a function to plot the average training results
def plot_avg_training_results(X, y, dataset_name):
    orders = range(1, 6)
    order_result1, order_result2 = [],[]
    fig, axes = plt.subplots(1, 5, figsize=(25, 5), sharey=True)
    
    for idx, order in enumerate(orders):
        P = RM(X, order)
        model = LogisticRegression(max_iter=1000)        
        kf = KFold(n_splits=5)
        train_results, test_results = [],[]
        
        for train_index, test_index in kf.split(P):
            X_train, X_test = P[train_index], P[test_index]
            y_train, y_test = y[train_index], y[test_index]
            model.fit(X_train, y_train)
            train = model.score(X_train,y_train) *100 
            test = model.score(X_test, y_test) *100
            train_results.append(train.round(2))
            test_results.append(test.round(2))
        
        train_ans = np.mean(train_results)        
        test_ans = np.mean(test_results)
        order_result1.append(train_ans)
        order_result2.append(test_ans)
        
        axes[idx].plot(range(1, len(train_results) + 1), train_results, marker='o', label='Training Accuracy')
        axes[idx].plot(range(1, len(test_results) + 1), test_results, marker='x', label='Testing Accuracy')
        axes[idx].set_title(f'Order: {order}')
        axes[idx].set_xlabel('Fold')
        axes[idx].set_ylabel('Accuracy')
        axes[idx].legend()
        axes[idx].grid(True)
    
    fig.suptitle(f'Average Training and Testing Accuracy for {dataset_name}', fontsize=16)
    plt.show()

    plt.figure(figsize=(10, 6))
    plt.plot(orders, order_result1, marker='o', label='Average Training Accuracy')
    plt.plot(orders, order_result2, marker='x', label='Average Testing Accuracy')
    plt.title(f'Training and Testing 5-fold each Order Accuracy for {dataset_name}')
    plt.xlabel('RM Order')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)
    plt.show()

# Plot average training results for each dataset
for set_name, X, y in datasetlist:
    print(set_name)
    plot_avg_training_results(X, y, set_name)
