<a href="https://colab.research.google.com/github/itsyourvip/desktop-tutorial/blob/main/TWSVM_ACC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from cvxopt import solvers, matrix
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
from sklearn.feature_selection import SelectKBest, f_classif


In [None]:
from sklearn.base import BaseEstimator, ClassifierMixin # import the BaseEstimator and ClassifierMixin

class TWSVM(BaseEstimator, ClassifierMixin):
    def __init__(self, c1=1.0, c2=1.0, reg_term=1e-5):
        self.c1 = c1
        self.c2 = c2
        self.reg_term = reg_term

    def fit(self, X, y):
        # Matrix A (class 1 samples) and Matrix B (class -1 samples)
        A = X[y == 1]
        B = X[y == -1]

        # Vectors of ones
        e1 = np.ones((A.shape[0], 1))
        e2 = np.ones((B.shape[0], 1))

        # Define H = [A e1], G = [B e2]
        H = np.column_stack((A, e1))
        G = np.column_stack((B, e2))

        # Helper function to check matrix conditioning
        def is_well_conditioned(matrix):                            #doubt
            condition_number = np.linalg.cond(matrix)
            return condition_number < 1e10

        # Solve QP for the first hyperplane
        def solve_qp(H, G, e, c, reg_term):
            HtH = np.dot(H.T, H) + reg_term * np.identity(H.shape[1])
            if not is_well_conditioned(HtH):
                return None

            P = matrix(np.dot(np.dot(G, np.linalg.inv(HtH)), G.T), tc='d')
            q = matrix(e, tc='d')
            G_qp = matrix(np.vstack([-np.identity(e.shape[0]), np.identity(e.shape[0])]), tc='d')
            h_qp = matrix(np.vstack([c * np.ones((e.shape[0], 1)), np.zeros((e.shape[0], 1))]), tc='d')

            try:
                sol = solvers.qp(P, q, G_qp, h_qp)
                alpha = np.array(sol['x'])
                return alpha
            except Exception as e:
                print(f"QP problem failed: {e}")
                return None

        # First hyperplane
        alpha1 = solve_qp(H, G, e2, self.c1, self.reg_term)
        if alpha1 is None:
            print("Failed to solve QP for the first hyperplane.")
            return self

        w1_b1 = -np.dot(np.linalg.inv(np.dot(H.T, H) + self.reg_term * np.identity(H.shape[1])), np.dot(G.T, alpha1))
        self.w1 = w1_b1[:-1]
        self.b1 = w1_b1[-1]

        # Second hyperplane
        alpha2 = solve_qp(G, H, e1, self.c2, self.reg_term)
        if alpha2 is None:
            print("Failed to solve QP for the second hyperplane.")
            return self

        w2_b2 = np.dot(np.linalg.inv(np.dot(G.T, G) + self.reg_term * np.identity(G.shape[1])), np.dot(H.T, alpha2))
        self.w2 = w2_b2[:-1]
        self.b2 = w2_b2[-1]

        return self

    def predict(self, X):
        dist_1 = np.abs(np.dot(X, self.w1) + self.b1)
        dist_2 = np.abs(np.dot(X, self.w2) + self.b2)

        return np.where(dist_1 < dist_2, 1, -1)


    def score(self, X, y):
        y_pred = self.predict(X)
        return accuracy_score(y, y_pred) # Call the accuracy_score function with y and y_pred


In [None]:
# Load the Breast Cancer Wisconsin dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Convert labels to +1 and -1
y = np.where(y == 1, 1, -1)

# Normalize the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Apply PCA to reduce dimensionality
pca = PCA(n_components=0.95)  # retain 95% of the variance
X = pca.fit_transform(X)

# Apply SMOTE to balance the dataset
smote = SMOTE(random_state=42)
X, y = smote.fit_resample(X, y)

# Perform feature selection
selector = SelectKBest(f_classif, k=10)
X = selector.fit_transform(X, y)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [None]:
# Define the parameter grid
param_grid = {
    'c1': [0.1, 1, 10, 100],
    'c2': [0.1, 1, 10, 100],
    'reg_term': [1e-5, 1e-4, 1e-3]
}


# Initialize TWSVM model
twsvm = TWSVM()

# Perform grid search with cross-validation
grid_search = GridSearchCV(twsvm, param_grid, cv=StratifiedKFold(n_splits=5), scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best parameters
best_params = grid_search.best_params_
print(f"Best parameters: {best_params}")

# Train the model with the best parameters
best_twsvm = grid_search.best_estimator_
best_twsvm.fit(X_train, y_train)

# Evaluate the model
accuracy = best_twsvm.score(X_test, y_test)
print(f'Accuracy: {accuracy * 100:.2f}%')


 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan]


     pcost       dcost       gap    pres   dres
 0: -2.0516e+01 -4.7473e+01  1e+03  2e+01  5e-16
 1: -8.4349e+00 -4.3853e+01  2e+02  3e+00  5e-16
 2: -5.4200e+00 -3.0106e+01  2e+01  4e-16  1e-15
 3: -6.3873e+00 -1.0269e+01  4e+00  2e-16  5e-16
 4: -7.6147e+00 -8.3158e+00  7e-01  2e-16  3e-16
 5: -7.8724e+00 -8.0597e+00  2e-01  2e-16  3e-16
 6: -7.9374e+00 -7.9848e+00  5e-02  2e-16  3e-16
 7: -7.9567e+00 -7.9634e+00  7e-03  2e-16  3e-16
 8: -7.9598e+00 -7.9600e+00  2e-04  2e-16  4e-16
 9: -7.9599e+00 -7.9599e+00  4e-06  2e-16  3e-16
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0: -2.0453e+01 -4.5249e+01  9e+02  2e+01  3e-16
 1: -1.3899e+01 -4.1207e+01  6e+01  7e-01  4e-16
 2: -1.2945e+01 -2.0215e+01  7e+00  2e-16  6e-16
 3: -1.3906e+01 -1.5564e+01  2e+00  2e-16  3e-16
 4: -1.4347e+01 -1.4780e+01  4e-01  2e-16  2e-16
 5: -1.4456e+01 -1.4626e+01  2e-01  2e-16  2e-16
 6: -1.4505e+01 -1.4558e+01  5e-02  2e-16  2e-16
 7: -1.4526e+01 -1.4532e+01  7e-03  2e-16  2e-1