In [1]:
#A2import numpy as np
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.datasets import load_iris  # Replace this with your actual dataset
from sklearn.metrics import accuracy_score

# Load your dataset (replace with your dataset)
data = load_iris()  # Using iris dataset as an example
X = data.data  # Features
y = data.target  # Labels

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define Perceptron hyperparameter grid
perceptron_params = {
    'penalty': ['l2', 'l1', 'elasticnet', None],
    'alpha': [0.0001, 0.001, 0.01, 0.1],
    'max_iter': [1000, 2000, 3000],
    'tol': [1e-3, 1e-4, 1e-5]
}

# Define MLPClassifier hyperparameter grid
mlp_params = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50)],
    'activation': ['relu', 'tanh', 'logistic'],
    'solver': ['adam', 'sgd'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate': ['constant', 'adaptive'],
    'max_iter': [200, 400, 600]
}

# Initialize Perceptron and MLP
perceptron = Perceptron()
mlp = MLPClassifier()

# RandomizedSearchCV for Perceptron
random_search_perceptron = RandomizedSearchCV(perceptron, perceptron_params, n_iter=10, cv=5, random_state=42)
random_search_perceptron.fit(X_train, y_train)

# RandomizedSearchCV for MLP
random_search_mlp = RandomizedSearchCV(mlp, mlp_params, n_iter=10, cv=5, random_state=42)
random_search_mlp.fit(X_train, y_train)

# Print the best hyperparameters for both models
print("Best parameters for Perceptron:", random_search_perceptron.best_params_)
print("Best parameters for MLP:", random_search_mlp.best_params_)

# Make predictions with the best model from RandomizedSearchCV
y_pred_perceptron = random_search_perceptron.best_estimator_.predict(X_test)
y_pred_mlp = random_search_mlp.best_estimator_.predict(X_test)

# Evaluate the models using accuracy
accuracy_perceptron = accuracy_score(y_test, y_pred_perceptron)
accuracy_mlp = accuracy_score(y_test, y_pred_mlp)

print(f"Perceptron Accuracy: {accuracy_perceptron}")
print(f"MLP Accuracy: {accuracy_mlp}")




Best parameters for Perceptron: {'tol': 0.0001, 'penalty': 'elasticnet', 'max_iter': 2000, 'alpha': 0.0001}
Best parameters for MLP: {'solver': 'adam', 'max_iter': 200, 'learning_rate': 'constant', 'hidden_layer_sizes': (50,), 'alpha': 0.01, 'activation': 'tanh'}
Perceptron Accuracy: 0.5666666666666667
MLP Accuracy: 1.0




In [2]:
#A3
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load your dataset (replace with the correct file path)
file_path = r'C:\Users\Siva Adithya\Downloads\DCT_withoutduplicate 5.csv'
data = pd.read_csv(file_path)

# Filter the dataset to include only records with LABEL 3333 and 3334
data_filtered = data[data['LABEL'].isin([3333, 3334])]

# Map labels 3333 to 0 and 3334 to 1
data_filtered['LABEL'] = data_filtered['LABEL'].map({3333: 0, 3334: 1})

# Split the dataset into features and labels
X = data_filtered.drop('LABEL', axis=1)
y = data_filtered['LABEL']

# Split the dataset into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a dictionary of classifiers to evaluate
classifiers = {
    "SVM": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
    "Naive Bayes": GaussianNB(),
    "CatBoost": CatBoostClassifier(silent=True)  # Set silent=True to suppress output
}

# Create a list to store the results
results = []

# Train and evaluate each classifier
for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    
    # Calculate performance metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    
    # Store the results in the list
    results.append({
        "Classifier": name,
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1-Score": f1
    })

# Convert the list of results to a DataFrame
results_df = pd.DataFrame(results)

# Display the results
print(results_df)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_filtered['LABEL'] = data_filtered['LABEL'].map({3333: 0, 3334: 1})
Parameters: { "use_label_encoder" } are not used.



      Classifier  Accuracy  Precision    Recall  F1-Score
0            SVM  0.777778   0.833333  0.777778  0.738095
1  Decision Tree  0.944444   0.952381  0.944444  0.945373
2  Random Forest  1.000000   1.000000  1.000000  1.000000
3       AdaBoost  1.000000   1.000000  1.000000  1.000000
4        XGBoost  1.000000   1.000000  1.000000  1.000000
5    Naive Bayes  0.944444   0.948718  0.944444  0.943030
6       CatBoost  1.000000   1.000000  1.000000  1.000000
