In [2]:
pip install scikit-learn catboost xgboost shap lime

Note: you may need to restart the kernel to use updated packages.


In [4]:
pip install xlrd

Note: you may need to restart the kernel to use updated packages.


In [18]:
#### A1 ####
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV, ShuffleSplit
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler

# Load your dataset from a CSV file
df = pd.read_csv("D:/_STUDIES/ML/DCT_withoutduplicate 3 (1).csv")

# Split the data into features (X) and target labels (y)
X = df.drop('LABEL', axis=1)  # Dropping the target column to get features
y = df['LABEL']  # Extracting the target column

# Split the dataset into training and testing sets
# 80% of data will be used for training, and 20% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features using StandardScaler
# This scales features to have zero mean and unit variance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  # Fit on training data and transform it
X_test = scaler.transform(X_test)  # Transform test data based on training fit

# Define the parameter grid for Perceptron model tuning
perceptron_params = {
    'penalty': ['l1', 'l2', None],  # Penalty type (L1, L2, or None)
    'alpha': [0.0001, 0.001, 0.01],  # Regularization strength
    'tol': [1e-3, 1e-4, 1e-2],  # Tolerance for stopping criteria
    'max_iter': [1000, 2000, 3000]  # Maximum number of iterations
}

# Define the parameter grid for MLPClassifier model tuning
mlp_params = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50)],  # Different network architectures
    'activation': ['relu', 'tanh'],  # Activation functions
    'solver': ['adam', 'sgd'],  # Optimizers
    'alpha': [0.0001, 0.001, 0.01],  # Regularization term (L2 penalty)
    'learning_rate': ['constant', 'adaptive'],  # Learning rate schedule
    'max_iter': [300, 500, 1000]  # Maximum number of iterations
}

# ShuffleSplit cross-validation: splits the data randomly multiple times (5 times here)
# Each time, 20% of the data is used for validation
cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=42)

# Tuning the Perceptron model using RandomizedSearchCV
# RandomizedSearchCV randomly samples hyperparameters from the grid and evaluates them
print("Tuning Perceptron...")
perceptron = Perceptron(class_weight='balanced')  # Initialize Perceptron with balanced class weights
perceptron_search = RandomizedSearchCV(
    perceptron, perceptron_params, n_iter=10, scoring='accuracy', cv=cv, random_state=42, n_jobs=-1
)
perceptron_search.fit(X_train, y_train)  # Train the model using cross-validation

# Tuning the MLP (Multi-Layer Perceptron) model
print("Tuning MLP...")
mlp = MLPClassifier()  # Initialize MLPClassifier
mlp_search = RandomizedSearchCV(
    mlp, mlp_params, n_iter=10, scoring='accuracy', cv=cv, random_state=42, n_jobs=-1
)
mlp_search.fit(X_train, y_train)  # Train the model using cross-validation

# Print the best hyperparameters and cross-validation scores for both models
print("Best Perceptron Parameters: ", perceptron_search.best_params_)
print("Best Perceptron Score: ", perceptron_search.best_score_)
print("Best MLP Parameters: ", mlp_search.best_params_)
print("Best MLP Score: ", mlp_search.best_score_)

# Predict the target labels for the test set using the best models found
y_pred_perceptron = perceptron_search.predict(X_test)
y_pred_mlp = mlp_search.predict(X_test)

# Performance report for Perceptron on the test data
print("\nPerceptron Test Performance")
print(classification_report(y_test, y_pred_perceptron, zero_division=0))  # zero_division handles cases where no class exists

# Performance report for MLP on the test data
print("\nMLP Test Performance")
print(classification_report(y_test, y_pred_mlp, zero_division=0))


Tuning Perceptron...
Tuning MLP...
Best Perceptron Parameters:  {'tol': 0.001, 'penalty': 'l1', 'max_iter': 1000, 'alpha': 0.0001}
Best Perceptron Score:  0.6294444444444445
Best MLP Parameters:  {'solver': 'adam', 'max_iter': 300, 'learning_rate': 'adaptive', 'hidden_layer_sizes': (50, 50), 'alpha': 0.0001, 'activation': 'tanh'}
Best MLP Score:  0.8427777777777777

Perceptron Test Performance
              precision    recall  f1-score   support

        3333       0.30      0.33      0.32         9
        3334       0.88      1.00      0.93         7
        3335       0.92      0.92      0.92        13
        3337       0.71      1.00      0.83         5
        3342       0.75      0.60      0.67        15
        3343       0.00      0.00      0.00         1
        3346       0.88      0.88      0.88         8
        3349       0.65      0.52      0.58        21
        3350       0.00      0.00      0.00         1
        3351       0.29      0.71      0.42         7
        

