In [2]:
# Import necessary libraries
from sklearn.datasets import load_wine, load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# Load wine dataset
wine = load_wine()
X_wine = wine.data
y_wine = wine.target

# Split wine dataset
X_wine_train, X_wine_test, y_wine_train, y_wine_test = train_test_split(
    X_wine, y_wine, test_size=0.2, random_state=42
)

# Load breast cancer dataset
cancer = load_breast_cancer()
X_cancer = cancer.data
y_cancer = cancer.target

# Split breast cancer dataset
X_cancer_train, X_cancer_test, y_cancer_train, y_cancer_test = train_test_split(
    X_cancer, y_cancer, test_size=0.2, random_state=42
)

# Function to train and evaluate model
def evaluate_model(model, X_train, X_test, y_train, y_test, dataset_name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{model.__class__.__name__} accuracy on {dataset_name}: {accuracy:.4f}")
    return accuracy

# Task I: Wine Classification
print("Wine Dataset Results:")
# (a) Logistic Regression
lr = LogisticRegression(random_state=42, max_iter=1000)
evaluate_model(lr, X_wine_train, X_wine_test, y_wine_train, y_wine_test, "wine")

# (b) Support Vector Machine
svm = SVC(random_state=42)
evaluate_model(svm, X_wine_train, X_wine_test, y_wine_train, y_wine_test, "wine")

# (c) Decision Tree Classifier
dt = DecisionTreeClassifier(random_state=42)
evaluate_model(dt, X_wine_train, X_wine_test, y_wine_train, y_wine_test, "wine")

# Sub-task 1: Decision Tree with different random state
print("\nSub-task 1: Decision Tree Random State Comparison")
dt_no_random = DecisionTreeClassifier()  # No random state
dt_random_42 = DecisionTreeClassifier(random_state=42)
dt_random_100 = DecisionTreeClassifier(random_state=100)

# Train and evaluate with different random states
acc_no_random = evaluate_model(dt_no_random, X_wine_train, X_wine_test, y_wine_train, y_wine_test, "wine (no random state)")
acc_random_42 = evaluate_model(dt_random_42, X_wine_train, X_wine_test, y_wine_train, y_wine_test, "wine (random_state=42)")
acc_random_100 = evaluate_model(dt_random_100, X_wine_train, X_wine_test, y_wine_train, y_wine_test, "wine (random_state=100)")

print("\nEffect of random state change:")
print(f"Difference between no random state and random_state=42: {abs(acc_no_random - acc_random_42):.4f}")
print(f"Difference between random_state=42 and random_state=100: {abs(acc_random_42 - acc_random_100):.4f}")

# Sub-task 2: Breast Cancer Classification
print("\nSub-task 2: Breast Cancer Dataset Results:")
# Logistic Regression
lr_cancer = LogisticRegression(random_state=42, max_iter=5000)
evaluate_model(lr_cancer, X_cancer_train, X_cancer_test, y_cancer_train, y_cancer_test, "breast cancer")

# Support Vector Machine
svm_cancer = SVC(random_state=42)
evaluate_model(svm_cancer, X_cancer_train, X_cancer_test, y_cancer_train, y_cancer_test, "breast cancer")

# Decision Tree Classifier
dt_cancer = DecisionTreeClassifier(random_state=42)
evaluate_model(dt_cancer, X_cancer_train, X_cancer_test, y_cancer_train, y_cancer_test, "breast cancer")

# Sub-task 3: Random Forest on Breast Cancer
print("\nSub-task 3: Random Forest on Breast Cancer")
rf_cancer = RandomForestClassifier(random_state=42)
evaluate_model(rf_cancer, X_cancer_train, X_cancer_test, y_cancer_train, y_cancer_test, "breast cancer")

Wine Dataset Results:
LogisticRegression accuracy on wine: 0.9722


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


SVC accuracy on wine: 0.8056
DecisionTreeClassifier accuracy on wine: 0.9444

Sub-task 1: Decision Tree Random State Comparison
DecisionTreeClassifier accuracy on wine (no random state): 0.9444
DecisionTreeClassifier accuracy on wine (random_state=42): 0.9444
DecisionTreeClassifier accuracy on wine (random_state=100): 0.9444

Effect of random state change:
Difference between no random state and random_state=42: 0.0000
Difference between random_state=42 and random_state=100: 0.0000

Sub-task 2: Breast Cancer Dataset Results:
LogisticRegression accuracy on breast cancer: 0.9561
SVC accuracy on breast cancer: 0.9474
DecisionTreeClassifier accuracy on breast cancer: 0.9474

Sub-task 3: Random Forest on Breast Cancer
RandomForestClassifier accuracy on breast cancer: 0.9649


0.9649122807017544