In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import matthews_corrcoef, accuracy_score, precision_score, recall_score, f1_score
from sklearn.decomposition import PCA
from sklearn.base import clone
import random


class ImprovedSCSO:
    def __init__(self, obj_func, bounds, pop_size=200, max_iter=200):
        self.obj_func = obj_func
        self.bounds = bounds
        self.pop_size = pop_size
        self.max_iter = max_iter
        self.dim = len(bounds)
        self.population = None
        self.fitness = None
        self.best_pos = None
        self.best_fit = float('inf')
        self.history = []

    def initialize_population(self):
        self.population = np.array([
            [random.uniform(b[0], b[1]) for b in self.bounds]
            for _ in range(self.pop_size)
        ])
        self.fitness = np.array([self.obj_func(pos) for pos in self.population])
        self.best_pos = self.population[np.argmin(self.fitness)]
        self.best_fit = np.min(self.fitness)
        self.history.append(self.best_fit)

    def update_position(self, i, iter_num):
        # 10% chance to make a random jump
        if random.random() < 0.2:
            self.population[i] = np.array([random.uniform(b[0], b[1]) for b in self.bounds])
            return

        r1, r2 = random.random(), random.random()
        alpha = 2.0  # Exploration strength
        sens_range = alpha * (1 - (iter_num / self.max_iter))  # Linear decay

        # Adaptive search probability
        search_prob = 0.5 + 0.3 * (iter_num / self.max_iter)
        if random.random() < search_prob:  # Searching (Exploration)
            rand_pos = self.population[random.randint(0, self.pop_size - 1)]
            new_pos = rand_pos + sens_range * r2 * (rand_pos - self.population[i])
        else:  # Attacking (Exploitation)
            new_pos = self.best_pos - sens_range * r2 * (self.best_pos - self.population[i])

        # Soft boundary handling
        for j in range(self.dim):
            if new_pos[j] <= self.bounds[j][0]:
                new_pos[j] = self.bounds[j][0] + random.uniform(0.01, 0.1) * (self.bounds[j][1] - self.bounds[j][0])
            elif new_pos[j] >= self.bounds[j][1]:
                new_pos[j] = self.bounds[j][1] - random.uniform(0.01, 0.1) * (self.bounds[j][1] - self.bounds[j][0])

        self.population[i] = new_pos

    def optimize(self):
        self.initialize_population()
        for iter_num in range(self.max_iter):
            for i in range(self.pop_size):
                self.update_position(i, iter_num)
                current_fit = self.obj_func(self.population[i])
                if current_fit < self.fitness[i]:
                    self.fitness[i] = current_fit
                    if current_fit < self.best_fit:
                        self.best_fit = current_fit
                        self.best_pos = self.population[i].copy()
                        print(f"Iter {iter_num}: New Best = {self.best_fit:.6f}")
            self.history.append(self.best_fit)
            print(f"Iteration {iter_num + 1}: Best Fitness = {self.best_fit:.6f}")
        return self.best_pos, self.best_fit

In [2]:
import numpy as np
import random
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, recall_score

class GrayWolfOptimizer:
    def __init__(self, obj_func, bounds, pop_size=20, max_iter=50):
        self.obj_func = obj_func
        self.bounds = bounds
        self.pop_size = pop_size
        self.max_iter = max_iter
        self.dim = len(bounds)
        self.population = None
        self.alpha, self.beta, self.delta = None, None, None
        self.alpha_score, self.beta_score, self.delta_score = float('inf'), float('inf'), float('inf')
        self.history = []

    def initialize_population(self):
        self.population = np.array([
            [random.uniform(b[0], b[1]) for b in self.bounds]
            for _ in range(self.pop_size)
        ])
        self.evaluate_population()

    def evaluate_population(self):
        for wolf in self.population:
            fitness = self.obj_func(wolf)
            if fitness < self.alpha_score:
                self.delta, self.delta_score = self.beta, self.beta_score
                self.beta, self.beta_score = self.alpha, self.alpha_score
                self.alpha, self.alpha_score = wolf, fitness
            elif fitness < self.beta_score:
                self.delta, self.delta_score = self.beta, self.beta_score
                self.beta, self.beta_score = wolf, fitness
            elif fitness < self.delta_score:
                self.delta, self.delta_score = wolf, fitness

    def update_position(self, wolf, iter_num):
        a = 2 - iter_num * (2 / self.max_iter)  # Decreasing a from 2 to 0
        new_position = np.zeros_like(wolf)
        for j in range(self.dim):
            A1, A2, A3 = 2 * a * random.random() - a, 2 * a * random.random() - a, 2 * a * random.random() - a
            C1, C2, C3 = 2 * random.random(), 2 * random.random(), 2 * random.random()
            D_alpha = abs(C1 * self.alpha[j] - wolf[j])
            D_beta = abs(C2 * self.beta[j] - wolf[j])
            D_delta = abs(C3 * self.delta[j] - wolf[j])
            X1 = self.alpha[j] - A1 * D_alpha
            X2 = self.beta[j] - A2 * D_beta
            X3 = self.delta[j] - A3 * D_delta
            new_position[j] = np.clip((X1 + X2 + X3) / 3, self.bounds[j][0], self.bounds[j][1])
        return new_position

    def optimize(self):
        self.initialize_population()
        for iter_num in range(self.max_iter):
            new_population = np.array([self.update_position(wolf, iter_num) for wolf in self.population])
            self.population = new_population
            self.evaluate_population()
            self.history.append(self.alpha_score)
            print(f"Iteration {iter_num + 1}: Best Fitness = {self.alpha_score:.6f}")
        return self.alpha, self.alpha_score

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import f1_score, recall_score
import random

class GrowthOptimization:
    def __init__(self, obj_func, bounds, pop_size=50, max_iter=100):
        self.obj_func = obj_func
        self.bounds = bounds
        self.pop_size = pop_size
        self.max_iter = max_iter
        self.dim = len(bounds)
        self.population = None
        self.fitness = None
        self.best_pos = None
        self.best_fit = float('inf')
        self.history = []
    
    def initialize_population(self):
        self.population = np.array([
            [random.uniform(b[0], b[1]) for b in self.bounds]
            for _ in range(self.pop_size)
        ])
        self.fitness = np.array([self.obj_func(pos) for pos in self.population])
        self.best_pos = self.population[np.argmin(self.fitness)]
        self.best_fit = np.min(self.fitness)
        self.history.append(self.best_fit)
    
    def update_position(self, i, iter_num):
        growth_factor = 1 - (iter_num / self.max_iter)
        
        rand_index = random.randint(0, self.pop_size - 1)
        new_pos = self.population[i] + growth_factor * (self.population[rand_index] - self.population[i])
        
        for j in range(self.dim):
            new_pos[j] = np.clip(new_pos[j], self.bounds[j][0], self.bounds[j][1])
        
        self.population[i] = new_pos
    
    def optimize(self):
        self.initialize_population()
        for iter_num in range(self.max_iter):
            for i in range(self.pop_size):
                self.update_position(i, iter_num)
                current_fit = self.obj_func(self.population[i])
                if current_fit < self.fitness[i]:
                    self.fitness[i] = current_fit
                    if current_fit < self.best_fit:
                        self.best_fit = current_fit
                        self.best_pos = self.population[i].copy()
                        print(f"Iter {iter_num}: New Best = {self.best_fit:.6f}")
            self.history.append(self.best_fit)
            print(f"Iteration {iter_num + 1}: Best Fitness = {self.best_fit:.6f}")
        return self.best_pos, self.best_fit



In [4]:
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import f1_score, recall_score

class PSO:
    def __init__(self, obj_func, bounds, pop_size=50, max_iter=100, w=0.5, c1=1.5, c2=1.5):
        self.obj_func = obj_func
        self.bounds = bounds
        self.pop_size = pop_size
        self.max_iter = max_iter
        self.w = w  # Inertia weight
        self.c1 = c1  # Cognitive coefficient
        self.c2 = c2  # Social coefficient
        self.dim = len(bounds)
        self.population = None
        self.velocity = None
        self.pbest_pos = None
        self.pbest_fit = None
        self.gbest_pos = None
        self.gbest_fit = float('inf')

    def initialize_population(self):
        self.population = np.array([
            [random.uniform(b[0], b[1]) for b in self.bounds]
            for _ in range(self.pop_size)
        ])
        self.velocity = np.zeros((self.pop_size, self.dim))
        self.pbest_pos = self.population.copy()
        self.pbest_fit = np.array([self.obj_func(pos) for pos in self.population])
        self.gbest_pos = self.pbest_pos[np.argmin(self.pbest_fit)]
        self.gbest_fit = np.min(self.pbest_fit)

    def update_particles(self):
        for i in range(self.pop_size):
            r1, r2 = random.random(), random.random()
            self.velocity[i] = (
                self.w * self.velocity[i]
                + self.c1 * r1 * (self.pbest_pos[i] - self.population[i])
                + self.c2 * r2 * (self.gbest_pos - self.population[i])
            )
            self.population[i] += self.velocity[i]
            
            # Apply boundary constraints
            self.population[i] = np.clip(self.population[i], [b[0] for b in self.bounds], [b[1] for b in self.bounds])
            
            # Evaluate fitness
            current_fit = self.obj_func(self.population[i])
            if current_fit < self.pbest_fit[i]:
                self.pbest_fit[i] = current_fit
                self.pbest_pos[i] = self.population[i].copy()
            if current_fit < self.gbest_fit:
                self.gbest_fit = current_fit
                self.gbest_pos = self.population[i].copy()

    def optimize(self):
        self.initialize_population()
        for iter_num in range(self.max_iter):
            self.update_particles()
            print(f"Iteration {iter_num + 1}: Best Fitness = {self.gbest_fit:.6f}")
        return self.gbest_pos, self.gbest_fit


In [5]:
class AdaptivePSO:
    def __init__(self, obj_func, bounds, pop_size=30, max_iter=100):
        self.obj_func = obj_func
        self.bounds = bounds
        self.pop_size = pop_size
        self.max_iter = max_iter
        self.dim = len(bounds)
        self.w_max, self.w_min = 0.9, 0.4
        self.c1, self.c2 = 2.0, 2.0
        self.positions = np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds], (pop_size, self.dim))
        self.velocities = np.zeros_like(self.positions)
        self.personal_best = self.positions.copy()
        self.personal_best_scores = np.array([obj_func(p) for p in self.positions])
        self.global_best = self.personal_best[np.argmin(self.personal_best_scores)]

    def optimize(self):
        for t in range(self.max_iter):
            w = self.w_max - ((self.w_max - self.w_min) * t / self.max_iter)
            for i in range(self.pop_size):
                r1, r2 = np.random.rand(self.dim), np.random.rand(self.dim)
                self.velocities[i] = (
                    w * self.velocities[i] +
                    self.c1 * r1 * (self.personal_best[i] - self.positions[i]) +
                    self.c2 * r2 * (self.global_best - self.positions[i])
                )
                self.positions[i] = np.clip(self.positions[i] + self.velocities[i], [b[0] for b in self.bounds], [b[1] for b in self.bounds])
                score = self.obj_func(self.positions[i])
                if score < self.personal_best_scores[i]:
                    self.personal_best[i] = self.positions[i]
                    self.personal_best_scores[i] = score
                    if score < self.obj_func(self.global_best):
                        self.global_best = self.positions[i]
            print(f"Iter {t+1}, Best Score: {self.obj_func(self.global_best):.5f}")
        return self.global_best, self.obj_func(self.global_best)


In [6]:
class QuantumPSO:
    def __init__(self, obj_func, bounds, pop_size=30, max_iter=100):
        self.obj_func = obj_func
        self.bounds = bounds
        self.pop_size = pop_size
        self.max_iter = max_iter
        self.dim = len(bounds)
        self.positions = np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds], (pop_size, self.dim))
        self.best = self.positions[np.argmin([obj_func(p) for p in self.positions])]

    def optimize(self):
        for t in range(self.max_iter):
            center = np.mean(self.positions, axis=0)
            for i in range(self.pop_size):
                u = np.random.uniform(0, 1, self.dim)
                self.positions[i] = center + np.abs(self.positions[i] - center) * np.log(1 / u)
                self.positions[i] = np.clip(self.positions[i], [b[0] for b in self.bounds], [b[1] for b in self.bounds])
            best_candidate = min(self.positions, key=self.obj_func)
            if self.obj_func(best_candidate) < self.obj_func(self.best):
                self.best = best_candidate
            print(f"Iter {t+1}, Best Score: {self.obj_func(self.best):.5f}")
        return self.best, self.obj_func(self.best)


In [7]:
class MultiSwarmPSO:
    def __init__(self, obj_func, bounds, pop_size=60, num_swarms=3, max_iter=100):
        self.obj_func = obj_func
        self.bounds = bounds
        self.num_swarms = num_swarms
        self.swarm_size = pop_size // num_swarms
        self.swarms = [
            AdaptivePSO(obj_func, bounds, pop_size=self.swarm_size, max_iter=max_iter)
            for _ in range(num_swarms)
        ]

    def optimize(self):
        best_score = float('inf')
        best_position = None
        for i, swarm in enumerate(self.swarms):
            pos, score = swarm.optimize()
            if score < best_score:
                best_score = score
                best_position = pos
        return best_position, best_score


In [8]:
import numpy as np
import random
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, recall_score

class ArtificialBeeColony:
    def __init__(self, obj_func, bounds, num_bees=50, max_iter=100, limit=10):
        self.obj_func = obj_func
        self.bounds = bounds
        self.num_bees = num_bees
        self.max_iter = max_iter
        self.limit = limit
        self.dim = len(bounds)
        self.population = None
        self.fitness = None
        self.trial = None
        self.best_solution = None
        self.best_fitness = float('inf')

    def initialize_population(self):
        """Initialize bee population randomly within bounds."""
        self.population = np.array([
            [random.uniform(b[0], b[1]) for b in self.bounds]
            for _ in range(self.num_bees)
        ])
        self.fitness = np.array([self.obj_func(pos) for pos in self.population])
        self.trial = np.zeros(self.num_bees)
        self.update_best()

    def update_best(self):
        """Update the best solution found so far."""
        min_idx = np.argmin(self.fitness)
        if self.fitness[min_idx] < self.best_fitness:
            self.best_fitness = self.fitness[min_idx]
            self.best_solution = self.population[min_idx].copy()

    def employed_bees_phase(self):
        """Employed bees phase: Each bee explores a new solution."""
        for i in range(self.num_bees):
            new_solution = self.mutate_solution(i)
            new_fitness = self.obj_func(new_solution)

            if new_fitness < self.fitness[i]:  # Greedy selection
                self.population[i] = new_solution
                self.fitness[i] = new_fitness
                self.trial[i] = 0
            else:
                self.trial[i] += 1

    def onlooker_bees_phase(self):
        """Onlooker bees phase: Select and improve best solutions."""
        if np.any(np.isnan(self.fitness)) or np.sum(self.fitness) == 0:
            probabilities = np.ones_like(self.fitness) / len(self.fitness)
        else:
           probabilities = self.fitness / np.sum(self.fitness)
           
        for _ in range(self.num_bees):
            i = np.random.choice(self.num_bees, p=probabilities)
            new_solution = self.mutate_solution(i)
            new_fitness = self.obj_func(new_solution)

            if new_fitness < self.fitness[i]:
                self.population[i] = new_solution
                self.fitness[i] = new_fitness
                self.trial[i] = 0
            else:
                self.trial[i] += 1

    def scout_bees_phase(self):
        """Scout bees phase: Replace stagnant solutions with new ones."""
        for i in range(self.num_bees):
            if self.trial[i] > self.limit:  # Stagnant solution
                self.population[i] = np.array([random.uniform(b[0], b[1]) for b in self.bounds])
                self.fitness[i] = self.obj_func(self.population[i])
                self.trial[i] = 0

    def mutate_solution(self, i):
        """Generate a new solution by modifying an existing one."""
        j = random.randint(0, self.dim - 1)  # Randomly select one dimension
        phi = random.uniform(-1, 1)  # Random adjustment factor
        k = random.choice([idx for idx in range(self.num_bees) if idx != i])

        new_solution = self.population[i].copy()
        new_solution[j] = self.population[i][j] + phi * (self.population[i][j] - self.population[k][j])

        # Ensure new values are within bounds
        new_solution[j] = max(self.bounds[j][0], min(new_solution[j], self.bounds[j][1]))

        return new_solution

    def optimize(self):
        """Run the ABC algorithm for optimization."""
        self.initialize_population()
        for _ in range(self.max_iter):
            self.employed_bees_phase()
            self.onlooker_bees_phase()
            self.scout_bees_phase()
            self.update_best()
            print(f"Iteration {_ + 1}: Best Fitness = {self.best_fitness:.6f}")
        return self.best_solution, self.best_fitness


In [9]:
import warnings
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import os.path
                                
warnings.filterwarnings("ignore")

save_path ='/home/akurdi/Desktop/SDP/new_run/'
file_path ='/home/akurdi/Desktop/SDP/new_run/all/'

print ('is folder found')
print (os.path.exists(file_path))

orinigal_dataset_path=[]
for dirname, _, filenames in os.walk(file_path):
    print(dirname)
    for filename in filenames:
        print(os.path.join(dirname, filename))
        if filename.endswith ('csv') :
            orinigal_dataset_path.append(os.path.join(dirname, filename))
        

is folder found
True
/home/akurdi/Desktop/SDP/new_run/all/
/home/akurdi/Desktop/SDP/new_run/all/safe.csv
/home/akurdi/Desktop/SDP/new_run/all/mw1.csv
/home/akurdi/Desktop/SDP/new_run/all/pc1.csv
/home/akurdi/Desktop/SDP/new_run/all/JDT.csv
/home/akurdi/Desktop/SDP/new_run/all/zxing.csv
/home/akurdi/Desktop/SDP/new_run/all/LC.csv
/home/akurdi/Desktop/SDP/new_run/all/cm1.csv
/home/akurdi/Desktop/SDP/new_run/all/ar3.csv
/home/akurdi/Desktop/SDP/new_run/all/ar5.csv
/home/akurdi/Desktop/SDP/new_run/all/apache.csv
/home/akurdi/Desktop/SDP/new_run/all/ar1.csv
/home/akurdi/Desktop/SDP/new_run/all/jm1.csv.later
/home/akurdi/Desktop/SDP/new_run/all/PC3.csv
/home/akurdi/Desktop/SDP/new_run/all/ar4.csv
/home/akurdi/Desktop/SDP/new_run/all/PC4.csv
/home/akurdi/Desktop/SDP/new_run/all/ML.csv
/home/akurdi/Desktop/SDP/new_run/all/PDE.csv
/home/akurdi/Desktop/SDP/new_run/all/EQ.csv
/home/akurdi/Desktop/SDP/new_run/all/kc1.csv
/home/akurdi/Desktop/SDP/new_run/all/ar6.csv
/home/akurdi/Desktop/SDP/new_run

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import os
import matplotlib.pyplot as plt
import seaborn as sns
import io
from PIL import Image
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder
from sklearn import preprocessing
import numpy as np
from scipy import stats
import seaborn as sns
from sklearn import metrics
from sklearn.metrics import roc_auc_score, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, ExtraTreesClassifier
from sklearn.metrics import classification_report

print('Number of datasets:', len(orinigal_dataset_path))

# Map to store dataset names and their normalized datasets
dataSets = {}

RANDOM_STATE = None
MAIN_DIRECTORY = "/home/akurdi/Desktop/SDP/new_run/Training_test/"
METRIC_NAMES = ["Accuracy", "Precision", "Recall", "F1-Score"]
ROUND_DIGITS = 2
POP_SIZE=100
MAX_ITER=200

random.seed(RANDOM_STATE)

def evaluate_model(model, model_name, X_test, y_test,datasetname ):
    y_pred = model.predict(X_test)
    cm = confusion_matrix(y_test, y_pred, labels=[0, 1])

    print(f"\nConfusion Matrix for {model_name}:")
    print(pd.DataFrame(cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]))

    report = classification_report(y_test, y_pred, output_dict=True, zero_division=0)
    
    result = {
        "dataset Name": datasetname,
        "Model": model_name,
        "Accuracy": round(accuracy_score(y_test, y_pred), ROUND_DIGITS),
        "Precision": round(report['weighted avg']['precision'], ROUND_DIGITS),
        "Recall": round(report['weighted avg']['recall'], ROUND_DIGITS),
        "F1-Score": round(report['weighted avg']['f1-score'], ROUND_DIGITS),
        "Support": int(report['weighted avg']['support']),
        "Class_0_Recall": round(report['0']['recall'], ROUND_DIGITS) if '0' in report else None,
        "Class_1_Recall": round(report['1']['recall'], ROUND_DIGITS) if '1' in report else None
    }

    return result


# Get all dataset folders
dataset_folders = [f for f in os.listdir(MAIN_DIRECTORY) 
                  if os.path.isdir(os.path.join(MAIN_DIRECTORY, f))]


results_file = os.path.join(MAIN_DIRECTORY, "all_3_SVM_Results.csv")

for dataset_path in orinigal_dataset_path:
    dataset_name = os.path.basename(dataset_path).split('.')[0]  # Extract dataset name from file path
    print(f"Processing dataset: {dataset_name}")

    # dataset_folder = os.path.join(MAIN_DIRECTORY, dataset_name)
    # results_file = os.path.join(dataset_folder, "all_300_200_SVM_Results.csv")
    # Load the dataset
    orig_df = pd.read_csv(dataset_path)
    print(f"Original shape: {orig_df.shape}")
    
    # Replace missing values represented as '?' with 0
    orig_df.replace('?', 0, inplace=True)

    # # 2. Convert all empty strings and whitespace-only strings to NaN
    # orig_df = orig_df.applymap(lambda x: np.nan if isinstance(x, str) and x.strip() == '' else x)
    
    # 3. Remove duplicate rows (keeping first occurrence)
    orig_df = orig_df.drop_duplicates()
    
    # 4. Remove rows with any NaN values (complete case analysis)
    initial_rows = len(orig_df)
    orig_df = orig_df.dropna(how='any')
    orig_df = orig_df.dropna()
    
    removed_rows = initial_rows - len(orig_df)
    print ('removed rows : ', removed_rows)

    print(f"Original shape: {orig_df.shape}")
    
    # Handle various target column naming conventions
    if 'class' in orig_df.columns:
        orig_df['class'] = orig_df['class'].replace({'clean': 0, 'buggy': 1}).astype(int)
        orig_df.rename(columns={'class': 'defects'}, inplace=True)
    elif 'defects' in orig_df.columns:
        orig_df['defects'] = orig_df['defects'].astype(int)
    elif 'bug' in orig_df.columns:
        orig_df.rename(columns={'bug': 'defects'}, inplace=True)
    elif 'isDefective' in orig_df.columns:
        orig_df['isDefective'] = orig_df['isDefective'].replace({'clean': 0, 'buggy': 1}).astype(int)
        orig_df.rename(columns={'isDefective': 'defects'}, inplace=True)
    elif 'Defective' in orig_df.columns:
        orig_df['Defective'] = orig_df['Defective'].replace({'N': 0, 'Y': 1}).astype(int)
        orig_df.rename(columns={'Defective': 'defects'}, inplace=True)
    elif 'c' in orig_df.columns:
        orig_df.rename(columns={'c': 'defects'}, inplace=True)
        orig_df['defects'] = orig_df['defects'].astype(int)

    all_results = []
    print("Total counts of each class in the entire dataset:")
    print(orig_df["defects"].value_counts())

    # all_results.append(orig_df["defects"].value_counts())
    # all_results.append(f"Original shape: {orig_df.shape}")

    y=orig_df["defects"]
    X = orig_df.drop(columns=['defects'] )

    scaler = preprocessing.MinMaxScaler(feature_range=(1, 2))
    names = X.columns
    d = scaler.fit_transform(X)
    X = pd.DataFrame(d, columns=names)

  
    # dataset_folder = f"/home/akurdi/Desktop/SDP/new_run/Training_test/{dataset_name}"
    # os.makedirs(dataset_folder, exist_ok=True)

    # Splitting into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20,random_state=RANDOM_STATE)
    print("Class distribution in training set:", y_train.value_counts().to_dict())
    print("Class distribution in testing set:", y_test.value_counts().to_dict())

    # Concatenating features and labels
    Testing = pd.concat([X_test, y_test], axis=1)
    Training = pd.concat([X_train, y_train], axis=1)


  

    # 2. Decision Tree
    dt = DecisionTreeClassifier(criterion='gini', random_state=RANDOM_STATE)
    dt.fit(X_train, y_train)
    all_results.append(evaluate_model(dt, "Decision Tree", X_test, y_test,dataset_name))

    # 3. K-Nearest Neighbors
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X_train, y_train)
    all_results.append(evaluate_model(knn, "KNN", X_test, y_test,dataset_name))

    # 4. Naive Bayes
    nb = GaussianNB()
    nb.fit(X_train, y_train)
    all_results.append(evaluate_model(nb, "Naive Bayes", X_test, y_test,dataset_name))

    # 5. Random Forest
    rf = RandomForestClassifier(n_estimators=800, max_depth=15, random_state=RANDOM_STATE)
    rf.fit(X_train, y_train)
    all_results.append(evaluate_model(rf, "Random Forest", X_test, y_test,dataset_name))


    # 6. XGBoost
    xgb = XGBClassifier(n_estimators=800, random_state=RANDOM_STATE)
    xgb.fit(X_train, y_train)
    all_results.append(evaluate_model(xgb, "XGBoost", X_test, y_test,dataset_name))



     # 1. SVM (keep first as requested)
    svm = SVC(kernel='rbf', C=1.0, gamma='scale', probability=True, random_state=RANDOM_STATE)
    svm.fit(X_train, y_train)
    all_results.append(evaluate_model(svm, "SVM", X_test, y_test,dataset_name))

    # Run SCSO optimization for SVM
    print(f"\nOptimizing SVM hyperparameters for dataset : {dataset_name}")
    svm_bounds = [
       (0.01, 100),   # Wider range for C
       (0.001, 1000)     # Wider range for gamma
    ]

    def svm_objective_function(params, X_train, y_train):
        C, gamma = params
        
        X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train,test_size=0.20, random_state=RANDOM_STATE)
        
          # Prevent crashing if only 1 class in the training split
        if len(np.unique(y_tr)) < 2:
            return 1.0  # High loss for invalid split
    
        model = SVC(kernel='rbf',C=C,gamma=gamma,probability=True,random_state=RANDOM_STATE)
        model.fit(X_tr, y_tr)
        # Evaluate on validation set
        y_pred = model.predict(X_val)
        
        f1 = f1_score(y_val, y_pred, average='weighted', zero_division=0)
        recall = recall_score(y_val, y_pred, average='weighted', zero_division=0)
        # print("C, gamma", C, gamma , recall , f1 )
        return 1 - f1  # Minimize error, prioritizing Recall and F1

    scso = ImprovedSCSO(obj_func=lambda params: svm_objective_function(params, X_train, y_train), bounds=svm_bounds, pop_size=POP_SIZE, max_iter=MAX_ITER)
    best_params, best_fitness = scso.optimize()
    
    # Extract best parameters
    best_C = best_params[0]
    best_gamma = best_params[1]
    
    print(f"\nBest parameters found for {dataset_name}:")
    print(f"C: {best_C:.4f}, gamma: {best_gamma:.4f}")

    # Initialize the SVM classifier with optimized parameters (RBF kernel)
    svm_optimized = SVC(kernel='rbf', C=best_C, gamma=best_gamma, 
                           probability=True,random_state=RANDOM_STATE)
    # Fit the model
    svm_optimized.fit(X_train, y_train)
    
    all_results.append(evaluate_model(svm_optimized, "svm_ImprovedSCSO_optimized", X_test, y_test,dataset_name))
    print(all_results)


    # Run GWO optimization for SVM
    print(f"\nOptimizing SVM hyperparameters using GWO")
    gwo = GrayWolfOptimizer(obj_func=lambda params: svm_objective_function(params, X_train, y_train),bounds=svm_bounds, pop_size=POP_SIZE, max_iter=MAX_ITER)
    best_params, best_fitness = gwo.optimize()

    # Extract best parameters
    best_C, best_gamma = best_params
    print(f"\nBest parameters found: C={best_C:.4f}, gamma={best_gamma:.4f}")

    # Train final SVM with optimized parameters
    svm_gwo_optimized = SVC(kernel='rbf', C=best_C, gamma=best_gamma, probability=True, random_state=RANDOM_STATE)
    svm_gwo_optimized.fit(X_train, y_train)

    # Evaluate final model
    all_results.append(evaluate_model(svm_gwo_optimized, "svm_GWO_optimized", X_test, y_test,dataset_name))
    print(all_results)

    
    # Run Growth Optimization for SVM tuning
    optimizer = GrowthOptimization(lambda params: svm_objective_function(params, X_train, y_train), bounds=svm_bounds, pop_size=POP_SIZE, max_iter=MAX_ITER)
    best_params, best_fitness = optimizer.optimize()

    best_C, best_gamma = best_params
    print(f"\nBest parameters found:")
    print(f"C: {best_C:.4f}, gamma: {best_gamma:.4f}")

    # Train final SVM model
    svm_GO_optimized = SVC(kernel='rbf', C=best_C, gamma=best_gamma, probability=True, random_state=42)
    svm_GO_optimized.fit(X_train, y_train)

    # Evaluate final model
    all_results.append(evaluate_model(svm_GO_optimized, "svm_GO_optimized", X_test, y_test,dataset_name))

      # Print summary
    print(all_results)
    # Run PSO optimization
    pso = PSO(obj_func=lambda params: svm_objective_function(params, X_train, y_train), bounds=svm_bounds, pop_size=POP_SIZE, max_iter=MAX_ITER)
    best_params, best_fitness = pso.optimize()

    # Extract best parameters
    best_C = best_params[0]
    best_gamma = best_params[1]
    print(f"\nBest parameters found:")
    print(f"C: {best_C:.4f}, gamma: {best_gamma:.4f}")

    # Train SVM with optimized parameters
    svm_PSO_optimized = SVC(kernel='rbf', C=best_C, gamma=best_gamma, probability=True, random_state=RANDOM_STATE)
    svm_PSO_optimized.fit(X_train, y_train)
    all_results.append(evaluate_model(svm_PSO_optimized, "svm_PSO_optimized", X_test, y_test,dataset_name))

    print(all_results)

    

    adaptive_pso = AdaptivePSO(
    obj_func=lambda params: svm_objective_function(params, X_train, y_train),
    bounds=svm_bounds,
    pop_size=POP_SIZE,
    max_iter=MAX_ITER
    )
    best_params, best_fitness = adaptive_pso.optimize()

    best_C, best_gamma = best_params
    print("\n[Adaptive PSO] Best Parameters:")
    print(f"C: {best_C:.4f}, gamma: {best_gamma:.4f}")

    svm_adaptive = SVC(kernel='rbf', C=best_C, gamma=best_gamma, probability=True, random_state=RANDOM_STATE)
    svm_adaptive.fit(X_train, y_train)
    all_results.append(evaluate_model(svm_adaptive, "svm_AdaptivePSO_optimized", X_test, y_test,dataset_name))
    print(all_results)  


    multi_pso = MultiSwarmPSO(
    obj_func=lambda params: svm_objective_function(params, X_train, y_train),
    bounds=svm_bounds,pop_size=POP_SIZE,num_swarms=5,max_iter=MAX_ITER)
    
    best_params, best_fitness = multi_pso.optimize()
    best_C, best_gamma = best_params
    print("\n[Multi-Swarm PSO] Best Parameters:")
    print(f"C: {best_C:.4f}, gamma: {best_gamma:.4f}")
    svm_multi = SVC(kernel='rbf', C=best_C, gamma=best_gamma, probability=True, random_state=RANDOM_STATE)
    svm_multi.fit(X_train, y_train)
    all_results.append(evaluate_model(svm_multi, "svm_MultiSwarmPSO_optimized", X_test, y_test,dataset_name))
    
    print(all_results)  

    # Run ABC optimization for SVM
    print(f"\nOptimizing SVM hyperparameters for dataset: {dataset_name}")
    abc = ArtificialBeeColony(lambda params: svm_objective_function(params, X_train, y_train), bounds=svm_bounds, num_bees=POP_SIZE, max_iter=MAX_ITER)
    best_params, best_fitness = abc.optimize()

    # Extract best parameters
    best_C = best_params[0]
    best_gamma = best_params[1]

    print(f"\nBest parameters found for {dataset_name}:")
    print(f"C: {best_C:.4f}, gamma: {best_gamma:.4f}")

    # Train SVM with optimized parameters
    svm_ABC_optimized = SVC(kernel='rbf', C=best_C, gamma=best_gamma, probability=True, random_state=RANDOM_STATE)
    svm_ABC_optimized.fit(X_train, y_train)

    # Evaluate the optimized model
    all_results.append(evaluate_model(svm_ABC_optimized, "svm_ABC_optimized", X_test, y_test,dataset_name))

    # print(all_results)

   # Convert results to DataFrame
    results_df = pd.DataFrame(all_results)

    # Check if the results file already exists
    file_exists = os.path.isfile(results_file)

    # Save by appending (mode='a') and write header only if file doesn't exist
    results_df.to_csv(results_file, mode='a', header=not file_exists, index=False)

    # Print summary
    print(f"\nResults for {dataset_name}:")
    print(results_df.to_string(index=False))
    print(f"\nSaved to: {results_file}")

Number of datasets: 20
Processing dataset: safe
Original shape: (56, 27)
removed rows :  0
Original shape: (56, 27)
Total counts of each class in the entire dataset:
defects
0    34
1    22
Name: count, dtype: int64
Class distribution in training set: {0: 27, 1: 17}
Class distribution in testing set: {0: 7, 1: 5}

Confusion Matrix for Decision Tree:
          Predicted 0  Predicted 1
Actual 0            6            1
Actual 1            2            3

Confusion Matrix for KNN:
          Predicted 0  Predicted 1
Actual 0            4            3
Actual 1            2            3

Confusion Matrix for Naive Bayes:
          Predicted 0  Predicted 1
Actual 0            5            2
Actual 1            3            2

Confusion Matrix for Random Forest:
          Predicted 0  Predicted 1
Actual 0            6            1
Actual 1            1            4

Confusion Matrix for XGBoost:
          Predicted 0  Predicted 1
Actual 0            5            2
Actual 1            1       