## SCAD

In [16]:
#SCAD
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from scipy.optimize import minimize
import plotly.figure_factory as ff
import warnings
warnings.filterwarnings("ignore")

# Load the dataset
df = pd.read_csv("data.csv")

# Convert diagnosis column to binary (1 for M, 0 for B)
df["diagnosis"] = df["diagnosis"].map({"M": 1, "B": 0})

# Drop unnecessary column
df.drop("Unnamed: 32", axis=1, inplace=True)

# Standardize the features
scaler = StandardScaler()
x = df.drop("diagnosis", axis=1)
x_scaled = scaler.fit_transform(x)
y = df["diagnosis"]

# Split the data into train and test sets
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.2, random_state=42)

# Define the logistic loss function with SCAD penalty
def logistic_loss(w, X, y, alpha=1, gamma=2):  # SCAD parameters alpha and gamma
    n = len(y)
    yz = y * np.dot(X, w)
    loss = np.sum(np.log1p(np.exp(-yz)))
    reg = alpha * np.sum(np.where(np.abs(w) <= alpha, 0, np.where(np.abs(w) <= gamma * alpha, (gamma * alpha - np.abs(w)) / (gamma - 1), alpha / (gamma - 1))))
    return loss + reg

# Coordinate Descent
def coordinate_descent_scad(X, y, alpha=0.01, gamma=3.7, max_iter=1000, tol=1e-5):
    n, d = X.shape
    w = np.zeros(d)
    for _ in range(max_iter):
        w_prev = w.copy()
        for j in range(d):
            X_j = X[:, j]
            yz = y * np.dot(X, w)
            grad_j = -np.dot(X_j, y / (1 + np.exp(yz)))
            if w[j] == 0:
                w[j] = -grad_j * alpha / (1 + alpha)
            else:
                w[j] = np.sign(w[j]) * max(0, abs(grad_j) - alpha) / (1 + gamma)
        if np.linalg.norm(w - w_prev) < tol:
            break
    return w

# Stochastic Gradient Descent
def stochastic_gradient_descent_scad(X, y, alpha=0.01, gamma=3.7, max_iter=1000, tol=1e-5, learning_rate=0.01):
    n, d = X.shape
    w = np.zeros(d)
    for _ in range(max_iter):
        idx = np.random.permutation(n)
        for i in idx:
            X_i = X[i]
            y_i = y[i]
            yz = y_i * np.dot(X_i, w)
            grad = -X_i * y_i / (1 + np.exp(yz))
            w -= learning_rate * grad
            w = np.sign(w) * np.maximum(0, np.abs(w) - alpha / (1 + gamma))
        if np.linalg.norm(grad) < tol:
            break
    return w

# Proximal Gradient Descent
def proximal_gradient_descent_scad(X, y, alpha=0.01, gamma=3.7, max_iter=1000, tol=1e-5, learning_rate=0.01):
    n, d = X.shape
    w = np.zeros(d)
    for _ in range(max_iter):
        w_prev = w.copy()
        yz = y * np.dot(X, w)
        grad = -np.dot(X.T, y / (1 + np.exp(yz))) / n
        w -= learning_rate * grad
        w = np.sign(w) * np.maximum(0, np.abs(w) - alpha / (1 + gamma))
        if np.linalg.norm(w - w_prev) < tol:
            break
    return w

# Train models
w_cd = coordinate_descent_scad(x_train, y_train)
w_sgd = stochastic_gradient_descent_scad(x_train, np.array(y_train))
w_pgd = proximal_gradient_descent_scad(x_train, y_train)

# Initialize coefficients
initial_w = np.zeros(x_train.shape[1])

# Define the optimization function
result = minimize(logistic_loss, initial_w, args=(x_train, y_train), method='L-BFGS-B')

# Get the optimized coefficients
optimal_w = result.x

# Predict using the optimized coefficients
def predict(X, w):
    logits = np.dot(X, w)
    return np.where(logits >= 0, 1, 0)

def evaluate(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    print("Accuracy:", accuracy)
    print("Classification Report:")
    print(classification_report(y_true, y_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))
    # Create confusion matrix plot
    fig = ff.create_annotated_heatmap(
        z=confusion_matrix(y_true, y_pred),
        x=['Predicted 0', 'Predicted 1'],
        y=['Actual 0', 'Actual 1'],
        colorscale='Viridis'
    )

    fig.update_layout(
        title='Confusion Matrix',
        xaxis_title='Predicted label',
        yaxis_title='Actual label'
    )

    fig.show()

y_pred = predict(x_test, optimal_w)

y_pred_cd = predict(x_test, w_cd)
y_pred_sgd = predict(x_test, w_sgd)
y_pred_pgd = predict(x_test, w_pgd)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Calculate other evaluation metrics
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

fig = ff.create_annotated_heatmap(
        z=conf_matrix,
        x=['Predicted 0', 'Predicted 1'],
        y=['Actual 0', 'Actual 1'],
        colorscale='Viridis'
    )

fig.update_layout(
    title='Confusion Matrix',
    xaxis_title='Predicted label',
    yaxis_title='Actual label'
)

fig.show()

print("Coordinate Descent:")
evaluate(y_test, y_pred_cd)

print("Stochastic Gradient Descent:")
evaluate(y_test, y_pred_sgd)

print("Proximal Gradient Descent:")
evaluate(y_test, y_pred_pgd)

Accuracy: 0.9123
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.89      0.93        71
           1       0.84      0.95      0.89        43

    accuracy                           0.91       114
   macro avg       0.90      0.92      0.91       114
weighted avg       0.92      0.91      0.91       114

Confusion Matrix:
[[63  8]
 [ 2 41]]


Coordinate Descent:
Accuracy: 0.9473684210526315
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.93      0.96        71
           1       0.89      0.98      0.93        43

    accuracy                           0.95       114
   macro avg       0.94      0.95      0.94       114
weighted avg       0.95      0.95      0.95       114

Confusion Matrix:
[[66  5]
 [ 1 42]]


Stochastic Gradient Descent:
Accuracy: 0.6842105263157895
Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.66      0.72        71
           1       0.56      0.72      0.63        43

    accuracy                           0.68       114
   macro avg       0.68      0.69      0.68       114
weighted avg       0.71      0.68      0.69       114

Confusion Matrix:
[[47 24]
 [12 31]]


Proximal Gradient Descent:
Accuracy: 0.37719298245614036
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        71
           1       0.38      1.00      0.55        43

    accuracy                           0.38       114
   macro avg       0.19      0.50      0.27       114
weighted avg       0.14      0.38      0.21       114

Confusion Matrix:
[[ 0 71]
 [ 0 43]]


## MCP

In [14]:
#MCP
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from scipy.optimize import minimize

# Load the dataset
df = pd.read_csv("data.csv")

# Convert diagnosis column to binary (1 for M, 0 for B)
df["diagnosis"] = df["diagnosis"].map({"M": 1, "B": 0})

# Drop unnecessary column
df.drop("Unnamed: 32", axis=1, inplace=True)

# Standardize the features
scaler = StandardScaler()
x = df.drop("diagnosis", axis=1)
x_scaled = scaler.fit_transform(x)
y = df["diagnosis"]

# Split the data into train and test sets
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.2, random_state=42)

# Define the logistic loss function with MCP penalty
def logistic_loss(w, X, y, alpha=0.001, gamma=1):  # MCP parameters alpha and gamma
    n = len(y)
    yz = y * np.dot(X, w)
    loss = np.sum(np.log1p(np.exp(-yz)))
    reg = alpha * np.sum(np.sqrt(1 + (w / (gamma * alpha))**2) - 1)
    return loss + reg

def coordinate_descent_mcp(X, y, alpha=0.001, gamma=1, max_iter=1000, tol=1e-5):
    n, d = X.shape
    w = np.zeros(d)
    for _ in range(max_iter):
        w_prev = w.copy()
        for j in range(d):
            X_j = X[:, j]
            yz = y * np.dot(X, w)
            grad_j = -np.dot(X_j, y / (1 + np.exp(yz)))
            if w[j] == 0:
                w[j] = -grad_j * alpha / (1 + alpha)
            else:
                w[j] = np.sign(w[j]) * max(0, abs(grad_j) - alpha * gamma) / ((1 + alpha * gamma) * gamma)
        if np.linalg.norm(w - w_prev) < tol:
            break
    return w

# Stochastic Gradient Descent for MCP Penalty
def stochastic_gradient_descent_mcp(X, y, alpha=0.001, gamma=1, max_iter=1000, tol=1e-5, learning_rate=0.01):
    n, d = X.shape
    w = np.zeros(d)
    for _ in range(max_iter):
        idx = np.random.permutation(n)
        for i in idx:
            X_i = X[i]
            y_i = y[i]
            yz = y_i * np.dot(X_i, w)
            grad = -X_i * y_i / (1 + np.exp(yz))
            w -= learning_rate * grad
            w = np.sign(w) * np.maximum(0, np.abs(w) - alpha * gamma) / ((1 + alpha * gamma) * gamma)
        if np.linalg.norm(grad) < tol:
            break
    return w

# Proximal Gradient Descent for MCP Penalty
def proximal_gradient_descent_mcp(X, y, alpha=0.001, gamma=1, max_iter=1000, tol=1e-5, learning_rate=0.01):
    n, d = X.shape
    w = np.zeros(d)
    for _ in range(max_iter):
        w_prev = w.copy()
        yz = y * np.dot(X, w)
        grad = -np.dot(X.T, y / (1 + np.exp(yz))) / n
        w -= learning_rate * grad
        w = np.sign(w) * np.maximum(0, np.abs(w) - alpha * gamma) / ((1 + alpha * gamma) * gamma)
        if np.linalg.norm(w - w_prev) < tol:
            break
    return w

# Train models
w_cd_mcp = coordinate_descent_mcp(x_train, y_train)
w_sgd_mcp = stochastic_gradient_descent_mcp(x_train, np.array(y_train))
w_pgd_mcp = proximal_gradient_descent_mcp(x_train, y_train)

# Initialize coefficients
initial_w = np.zeros(x_train.shape[1])

# Define the optimization function
result = minimize(logistic_loss, initial_w, args=(x_train, y_train), method='L-BFGS-B')

# Get the optimized coefficients
optimal_w = result.x

# Predict using the optimized coefficients
def predict(X, w):
    logits = np.dot(X, w)
    return np.where(logits >= 0, 1, 0)

def evaluate(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    print("Accuracy:", accuracy)
    print("Classification Report:")
    print(classification_report(y_true, y_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))
    # Create confusion matrix plot
    fig = ff.create_annotated_heatmap(
        z=confusion_matrix(y_true, y_pred),
        x=['Predicted 0', 'Predicted 1'],
        y=['Actual 0', 'Actual 1'],
        colorscale='Viridis'
    )

    fig.update_layout(
        title='Confusion Matrix',
        xaxis_title='Predicted label',
        yaxis_title='Actual label'
    )

    fig.show()

y_pred = predict(x_test, optimal_w)

# Predict using the optimized coefficients
y_pred_cd_mcp = predict(x_test, w_cd_mcp)
y_pred_sgd_mcp = predict(x_test, w_sgd_mcp)
y_pred_pgd_mcp = predict(x_test, w_pgd_mcp)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Calculate other evaluation metrics
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

fig = ff.create_annotated_heatmap(
        z=conf_matrix,
        x=['Predicted 0', 'Predicted 1'],
        y=['Actual 0', 'Actual 1'],
        colorscale='Viridis'
    )

fig.update_layout(
    title='Confusion Matrix',
    xaxis_title='Predicted label',
    yaxis_title='Actual label'
)

fig.show()

# Evaluate models
print("Coordinate Descent with MCP Penalty:")
evaluate(y_test, y_pred_cd_mcp)

print("Stochastic Gradient Descent with MCP Penalty:")
evaluate(y_test, y_pred_sgd_mcp)

print("Proximal Gradient Descent with MCP Penalty:")
evaluate(y_test, y_pred_pgd_mcp)

Accuracy: 0.9298
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.92      0.94        71
           1       0.87      0.95      0.91        43

    accuracy                           0.93       114
   macro avg       0.92      0.93      0.93       114
weighted avg       0.93      0.93      0.93       114

Confusion Matrix:
[[65  6]
 [ 2 41]]


Coordinate Descent with MCP Penalty:
Accuracy: 0.9385964912280702
Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.92      0.95        71
           1       0.88      0.98      0.92        43

    accuracy                           0.94       114
   macro avg       0.93      0.95      0.94       114
weighted avg       0.94      0.94      0.94       114

Confusion Matrix:
[[65  6]
 [ 1 42]]


Stochastic Gradient Descent with MCP Penalty:
Accuracy: 0.9736842105263158
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.96      0.98        71
           1       0.93      1.00      0.97        43

    accuracy                           0.97       114
   macro avg       0.97      0.98      0.97       114
weighted avg       0.98      0.97      0.97       114

Confusion Matrix:
[[68  3]
 [ 0 43]]


Proximal Gradient Descent with MCP Penalty:
Accuracy: 0.9736842105263158
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.96      0.98        71
           1       0.93      1.00      0.97        43

    accuracy                           0.97       114
   macro avg       0.97      0.98      0.97       114
weighted avg       0.98      0.97      0.97       114

Confusion Matrix:
[[68  3]
 [ 0 43]]
