In [None]:
import os
import pandas as pd

# Folder containing the original CSV files
input_folder = r"C:\Users\91934\Desktop\project minor\dataset"

# Folder to save the updated CSVs
output_folder = r"C:\Users\91934\Desktop\project minor\newdataset"
os.makedirs(output_folder, exist_ok=True)  # create folder if it doesn't exist

# Define the headers you want to add
headers = ['Fp1', 'Fp2', 'F3', 'F4', 'F7', 'F8', 'T3', 'T4', 'C3', 'C4', 'T5', 'T6', 'P3', 'P4', 'O1', 'O2', 'Fz', 'Cz', 'Pz']  # modify according to your CSV

# Loop through each CSV file
for filename in os.listdir(input_folder):
    if filename.endswith(".csv"):
        file_path = os.path.join(input_folder, filename)
        
        # Read CSV without header
        df = pd.read_csv(file_path, header=None)
        
        # Assign the new headers
        df.columns = headers
        
        # Save to new folder with the same filename
        new_file_path = os.path.join(output_folder, filename)
        df.to_csv(new_file_path, index=False)

print("Headers added and CSVs saved separately in the new folder.")


In [None]:
import os
import pandas as pd

# Folder containing the original CSV files
input_folder = r"C:\Users\91934\Desktop\project minor\newdataset"

# Folder to save the new CSV files
output_folder = os.path.join(input_folder, "left")
os.makedirs(output_folder, exist_ok=True)  # create folder if it doesn't exist

# Columns to extract
columns_to_extract = ['Fp1', 'F3', 'F7', 'Fz']

# Loop through each CSV file
for filename in os.listdir(input_folder):
    if filename.endswith(".csv"):
        file_path = os.path.join(input_folder, filename)
        
        # Read CSV
        df = pd.read_csv(file_path)
        
        # Extract only the required columns
        df_new = df[columns_to_extract]
        
        # Save the new CSV in the "left" folder
        new_file_path = os.path.join(output_folder, filename)
        df_new.to_csv(new_file_path, index=False)

print("Columns extracted and saved individually in the 'left' folder.")


In [None]:
import os
import pandas as pd

# Folder containing the original CSV files
input_folder = r"C:\Users\91934\Desktop\project minor\newdataset"

# Folder to save the new CSV files
output_folder = os.path.join(input_folder, "right")
os.makedirs(output_folder, exist_ok=True)  # create folder if it doesn't exist

# Columns to extract
columns_to_extract = ['Fp2', 'F4', 'F8', 'Fz']

# Loop through each CSV file
for filename in os.listdir(input_folder):
    if filename.endswith(".csv"):
        file_path = os.path.join(input_folder, filename)
        
        # Read CSV
        df = pd.read_csv(file_path)
        
        # Extract only the required columns
        df_new = df[columns_to_extract]
        
        # Save the new CSV in the "left" folder
        new_file_path = os.path.join(output_folder, filename)
        df_new.to_csv(new_file_path, index=False)

print("Columns extracted and saved individually in the 'left' folder.")


In [None]:
import os
import numpy as np
import pandas as pd
from scipy.signal import welch, hilbert
from scipy.stats import entropy
from pyeeg import hjorth, largest_lyauponov_exponent as lziv_complexity
from nolds import lyap_r
from itertools import combinations

# Parameters
input_folder = r"C:\Users\91934\Desktop\project minor\left"       # Folder containing original CSVs
output_folder = r"C:\Users\91934\Desktop\project minor\leftseg" # Folder to save feature CSVs
ws = 10  # window size in seconds
fs = 30  # sampling frequency

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Frequency bands (Hz)
bands = {
    'theta': (4, 8),
    'alpha': (8, 13),
    'beta': (13, 30)
}

def bandpower(data, fs, band):
    f, Pxx = welch(data, fs=fs, nperseg=len(data))
    freq_res = f[1] - f[0]
    idx_band = np.logical_and(f >= band[0], f <= band[1])
    return np.sum(Pxx[idx_band]) * freq_res

def compute_features(segment):
    features = {}
    
    # PSD Mean
    f, Pxx = welch(segment, fs=fs, nperseg=len(segment))
    features['psd_mean'] = np.mean(Pxx)
    
    # Band powers
    for b in bands:
        features[f'{b}_power'] = bandpower(segment, fs, bands[b])
    
    # Band ratios
    features['alpha_theta_ratio'] = features['alpha_power'] / (features['theta_power'] + 1e-6)
    features['alpha_beta_ratio'] = features['alpha_power'] / (features['beta_power'] + 1e-6)
    
    # Hjorth parameters
    features['hjorth_mobility'], features['hjorth_complexity'] = hjorth(segment)[1:]
    
    # Lempel-Ziv complexity
    features['lz_complexity'] = lziv_complexity(segment)
    
    # Lyapunov exponent
    try:
        features['lyapunov'] = lyap_r(segment, emb_dim=10)
    except:
        features['lyapunov'] = np.nan
    
    # Entropy
    hist, _ = np.histogram(segment, bins=50, density=True)
    features['entropy'] = entropy(hist + 1e-6)
    
    # Fractal dimension (Higuchi method)
    def higuchi_fd(x, kmax=10):
        L = []
        N = len(x)
        for k in range(1, kmax):
            Lk = []
            for m in range(k):
                Lmk = 0
                for i in range(1, int(np.floor((N - m) / k))):
                    Lmk += abs(x[m + i*k] - x[m + (i-1)*k])
                Lmk = (Lmk * (N - 1) / (np.floor((N - m)/k) * k)) / k
                Lk.append(Lmk)
            L.append(np.mean(Lk))
        lnL = np.log(L)
        lnk = np.log(1.0 / np.arange(1, kmax))
        return np.polyfit(lnk, lnL, 1)[0]
    
    features['fractal_dim'] = higuchi_fd(segment)
    
    return features

def compute_plv_matrix(segment):
    n_channels = segment.shape[1]
    plv_matrix = {}
    for ch1, ch2 in combinations(range(n_channels), 2):
        phase1 = np.angle(hilbert(segment[:, ch1]))
        phase2 = np.angle(hilbert(segment[:, ch2]))
        plv_val = np.abs(np.mean(np.exp(1j*(phase1 - phase2))))
        plv_matrix[f'plv_ch{ch1}_ch{ch2}'] = plv_val
    return plv_matrix

# Loop through CSVs
for file in os.listdir(input_folder):
    if file.endswith(".csv"):
        data = pd.read_csv(os.path.join(input_folder, file))
        data_array = data.values  # shape [samples, channels]
        n_samples, n_channels = data_array.shape
        step = ws * fs
        all_features = []
        
        for start in range(0, n_samples - step + 1, step):
            segment = data_array[start:start+step, :]
            segment_features = {}
            
            # Features per channel
            for ch in range(n_channels):
                ch_features = compute_features(segment[:, ch])
                ch_features = {f'ch{ch}_{k}': v for k,v in ch_features.items()}
                segment_features.update(ch_features)
            
            # PLV for all pairs
            plv_features = compute_plv_matrix(segment)
            segment_features.update(plv_features)
            
            segment_features['start_sample'] = start
            all_features.append(segment_features)
        
        # Save features for this CSV in output folder
        features_df = pd.DataFrame(all_features)
        save_path = os.path.join(output_folder, f"{os.path.splitext(file)[0]}_features.csv")
        features_df.to_csv(save_path, index=False)
        print(f"Saved features for {file} to {save_path}")

print("All CSV files processed and saved in the output folder.")


In [None]:
import os
import numpy as np
import pandas as pd
from scipy.signal import welch, hilbert
from scipy.stats import entropy
from pyeeg import hjorth
from nolds import lyap_r
from itertools import combinations

# Parameters
input_folder = r"C:\Users\91934\Desktop\project minor\left"       # Folder containing original CSVs
output_folder = r"C:\Users\91934\Desktop\project minor\leftseg"   # Folder to save feature CSVs
ws = 10  # window size in seconds
fs = 30  # sampling frequency

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Frequency bands (Hz)
bands = {
    'theta': (4, 8),
    'alpha': (8, 13),
    'beta': (13, 30)
}

def bandpower(data, fs, band):
    f, Pxx = welch(data, fs=fs, nperseg=len(data))
    freq_res = f[1] - f[0]
    idx_band = np.logical_and(f >= band[0], f <= band[1])
    return np.sum(Pxx[idx_band]) * freq_res

def lz_complexity(s):
    """Compute Lempel-Ziv complexity of a 1D array"""
    median = np.median(s)
    s_bin = ''.join(['1' if i > median else '0' for i in s])
    i, k, l, n = 0, 1, 1, len(s_bin)
    c = 1
    while True:
        if s_bin[i+k-1] != s_bin[l+k-1]:
            if k > l:
                c += 1
                i = 0
                l += k
                k = 1
            else:
                k += 1
            if l + k - 1 >= n:
                c += 1
                break
        else:
            k += 1
            if l + k - 1 >= n:
                c += 1
                break
    return c

def compute_features(segment):
    if len(segment) < 2:  # Safety check for too short segments
        return { 
            'psd_mean': np.nan, 'alpha_power': np.nan, 'theta_power': np.nan, 'beta_power': np.nan,
            'alpha_theta_ratio': np.nan, 'alpha_beta_ratio': np.nan,
            'hjorth_mobility': np.nan, 'hjorth_complexity': np.nan,
            'lz_complexity': np.nan, 'lyapunov': np.nan,
            'entropy': np.nan, 'fractal_dim': np.nan
        }
    
    features = {}
    
    # PSD Mean
    f, Pxx = welch(segment, fs=fs, nperseg=len(segment))
    features['psd_mean'] = np.mean(Pxx)
    
    # Band powers
    for b in bands:
        features[f'{b}_power'] = bandpower(segment, fs, bands[b])
    
    # Band ratios
    features['alpha_theta_ratio'] = features['alpha_power'] / (features['theta_power'] + 1e-6)
    features['alpha_beta_ratio'] = features['alpha_power'] / (features['beta_power'] + 1e-6)
    
    # Hjorth parameters with safe unpacking
    try:
        hj = hjorth(segment)
        features['hjorth_mobility'] = hj[1] if len(hj) > 1 else np.nan
        features['hjorth_complexity'] = hj[2] if len(hj) > 2 else np.nan
    except:
        features['hjorth_mobility'] = np.nan
        features['hjorth_complexity'] = np.nan
    
    # Lempel-Ziv complexity
    features['lz_complexity'] = lz_complexity(segment)
    
    # Lyapunov exponent
    try:
        features['lyapunov'] = lyap_r(segment, emb_dim=10)
    except:
        features['lyapunov'] = np.nan
    
    # Entropy
    hist, _ = np.histogram(segment, bins=50, density=True)
    features['entropy'] = entropy(hist + 1e-6)
    
    # Fractal dimension (Higuchi method)
    def higuchi_fd(x, kmax=10):
        L = []
        N = len(x)
        for k in range(1, kmax):
            Lk = []
            for m in range(k):
                Lmk = 0
                for i in range(1, int(np.floor((N - m) / k))):
                    Lmk += abs(x[m + i*k] - x[m + (i-1)*k])
                Lmk = (Lmk * (N - 1) / (np.floor((N - m)/k) * k)) / k
                Lk.append(Lmk)
            L.append(np.mean(Lk))
        lnL = np.log(L)
        lnk = np.log(1.0 / np.arange(1, kmax))
        return np.polyfit(lnk, lnL, 1)[0]
    
    features['fractal_dim'] = higuchi_fd(segment)
    
    return features

def compute_plv_matrix(segment):
    n_channels = segment.shape[1]
    plv_matrix = {}
    for ch1, ch2 in combinations(range(n_channels), 2):
        phase1 = np.angle(hilbert(segment[:, ch1]))
        phase2 = np.angle(hilbert(segment[:, ch2]))
        plv_val = np.abs(np.mean(np.exp(1j*(phase1 - phase2))))
        plv_matrix[f'plv_ch{ch1}_ch{ch2}'] = plv_val
    return plv_matrix

# Loop through CSVs
for file in os.listdir(input_folder):
    if file.endswith(".csv"):
        data = pd.read_csv(os.path.join(input_folder, file))
        data_array = data.values  # shape [samples, channels]
        n_samples, n_channels = data_array.shape
        step = ws * fs
        all_features = []
        
        if n_samples < step:  # Skip if CSV is shorter than window
            print(f"Skipping {file}: too short for windowing")
            continue
        
        for start in range(0, n_samples - step + 1, step):
            segment = data_array[start:start+step, :]
            segment_features = {}
            
            # Features per channel
            for ch in range(n_channels):
                ch_features = compute_features(segment[:, ch])
                ch_features = {f'ch{ch}_{k}': v for k,v in ch_features.items()}
                segment_features.update(ch_features)
            
            # PLV for all pairs
            plv_features = compute_plv_matrix(segment)
            segment_features.update(plv_features)
            
            segment_features['start_sample'] = start
            all_features.append(segment_features)
        
        # Save features for this CSV in output folder
        features_df = pd.DataFrame(all_features)
        save_path = os.path.join(output_folder, f"{os.path.splitext(file)[0]}_features.csv")
        features_df.to_csv(save_path, index=False)
        print(f"Saved features for {file} to {save_path}")

print("All CSV files processed and saved in the output folder.")


In [None]:
import os
import numpy as np
import pandas as pd
from scipy.signal import welch, hilbert
from scipy.stats import entropy
from pyeeg import hjorth
from nolds import lyap_r
from itertools import combinations

# Parameters
input_folder = r"C:\Users\91934\Desktop\project minor\right"       # Folder containing original CSVs
output_folder = r"C:\Users\91934\Desktop\project minor\rightseg"   # Folder to save feature CSVs
ws = 10  # window size in seconds
fs = 30  # sampling frequency

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Frequency bands (Hz)
bands = {
    'theta': (4, 8),
    'alpha': (8, 13),
    'beta': (13, 30)
}

def bandpower(data, fs, band):
    f, Pxx = welch(data, fs=fs, nperseg=len(data))
    freq_res = f[1] - f[0]
    idx_band = np.logical_and(f >= band[0], f <= band[1])
    return np.sum(Pxx[idx_band]) * freq_res

def lz_complexity(s):
    """Compute Lempel-Ziv complexity of a 1D array"""
    median = np.median(s)
    s_bin = ''.join(['1' if i > median else '0' for i in s])
    i, k, l, n = 0, 1, 1, len(s_bin)
    c = 1
    while True:
        if s_bin[i+k-1] != s_bin[l+k-1]:
            if k > l:
                c += 1
                i = 0
                l += k
                k = 1
            else:
                k += 1
            if l + k - 1 >= n:
                c += 1
                break
        else:
            k += 1
            if l + k - 1 >= n:
                c += 1
                break
    return c

def compute_features(segment):
    if len(segment) < 2:  # Safety check for too short segments
        return { 
            'psd_mean': np.nan, 'alpha_power': np.nan, 'theta_power': np.nan, 'beta_power': np.nan,
            'alpha_theta_ratio': np.nan, 'alpha_beta_ratio': np.nan,
            'hjorth_mobility': np.nan, 'hjorth_complexity': np.nan,
            'lz_complexity': np.nan, 'lyapunov': np.nan,
            'entropy': np.nan, 'fractal_dim': np.nan
        }
    
    features = {}
    
    # PSD Mean
    f, Pxx = welch(segment, fs=fs, nperseg=len(segment))
    features['psd_mean'] = np.mean(Pxx)
    
    # Band powers
    for b in bands:
        features[f'{b}_power'] = bandpower(segment, fs, bands[b])
    
    # Band ratios
    features['alpha_theta_ratio'] = features['alpha_power'] / (features['theta_power'] + 1e-6)
    features['alpha_beta_ratio'] = features['alpha_power'] / (features['beta_power'] + 1e-6)
    
    # Hjorth parameters with safe unpacking
    try:
        hj = hjorth(segment)
        features['hjorth_mobility'] = hj[1] if len(hj) > 1 else np.nan
        features['hjorth_complexity'] = hj[2] if len(hj) > 2 else np.nan
    except:
        features['hjorth_mobility'] = np.nan
        features['hjorth_complexity'] = np.nan
    
    # Lempel-Ziv complexity
    features['lz_complexity'] = lz_complexity(segment)
    
    # Lyapunov exponent
    try:
        features['lyapunov'] = lyap_r(segment, emb_dim=10)
    except:
        features['lyapunov'] = np.nan
    
    # Entropy
    hist, _ = np.histogram(segment, bins=50, density=True)
    features['entropy'] = entropy(hist + 1e-6)
    
    # Fractal dimension (Higuchi method)
    def higuchi_fd(x, kmax=10):
        L = []
        N = len(x)
        for k in range(1, kmax):
            Lk = []
            for m in range(k):
                Lmk = 0
                for i in range(1, int(np.floor((N - m) / k))):
                    Lmk += abs(x[m + i*k] - x[m + (i-1)*k])
                Lmk = (Lmk * (N - 1) / (np.floor((N - m)/k) * k)) / k
                Lk.append(Lmk)
            L.append(np.mean(Lk))
        lnL = np.log(L)
        lnk = np.log(1.0 / np.arange(1, kmax))
        return np.polyfit(lnk, lnL, 1)[0]
    
    features['fractal_dim'] = higuchi_fd(segment)
    
    return features

def compute_plv_matrix(segment):
    n_channels = segment.shape[1]
    plv_matrix = {}
    for ch1, ch2 in combinations(range(n_channels), 2):
        phase1 = np.angle(hilbert(segment[:, ch1]))
        phase2 = np.angle(hilbert(segment[:, ch2]))
        plv_val = np.abs(np.mean(np.exp(1j*(phase1 - phase2))))
        plv_matrix[f'plv_ch{ch1}_ch{ch2}'] = plv_val
    return plv_matrix

# Loop through CSVs
for file in os.listdir(input_folder):
    if file.endswith(".csv"):
        data = pd.read_csv(os.path.join(input_folder, file))
        data_array = data.values  # shape [samples, channels]
        n_samples, n_channels = data_array.shape
        step = ws * fs
        all_features = []
        
        if n_samples < step:  # Skip if CSV is shorter than window
            print(f"Skipping {file}: too short for windowing")
            continue
        
        for start in range(0, n_samples - step + 1, step):
            segment = data_array[start:start+step, :]
            segment_features = {}
            
            # Features per channel
            for ch in range(n_channels):
                ch_features = compute_features(segment[:, ch])
                ch_features = {f'ch{ch}_{k}': v for k,v in ch_features.items()}
                segment_features.update(ch_features)
            
            # PLV for all pairs
            plv_features = compute_plv_matrix(segment)
            segment_features.update(plv_features)
            
            segment_features['start_sample'] = start
            all_features.append(segment_features)
        
        # Save features for this CSV in output folder
        features_df = pd.DataFrame(all_features)
        save_path = os.path.join(output_folder, f"{os.path.splitext(file)[0]}_features.csv")
        features_df.to_csv(save_path, index=False)
        print(f"Saved features for {file} to {save_path}")

print("All CSV files processed and saved in the output folder.")


In [None]:
import os
import pandas as pd

# Folders
left_folder = r"C:\Users\91934\Desktop\project minor\leftseg"
right_folder = r"C:\Users\91934\Desktop\project minor\rightseg"
output_folder = r"C:\Users\91934\Desktop\project minor\asymmetry_all"
os.makedirs(output_folder, exist_ok=True)

# List CSV files
left_files = [f for f in os.listdir(left_folder) if f.endswith('.csv')]
right_files = [f for f in os.listdir(right_folder) if f.endswith('.csv')]

# Function to compute asymmetry: Asymmetry = (L + R) / (L - R)
def compute_asymmetry(left_val, right_val):
    return (left_val + right_val) / (left_val - right_val + 1e-6)

# Loop through files
for file in left_files:
    if file in right_files:  # match by name
        left_df = pd.read_csv(os.path.join(left_folder, file))
        right_df = pd.read_csv(os.path.join(right_folder, file))

        # Ensure same number of segments
        n_segments = min(len(left_df), len(right_df))

        asymmetry_list = []

        for i in range(n_segments):
            segment_dict = {'segment': i}
            # Loop through all columns (features) in the left CSV
            for col in left_df.columns:
                if col in right_df.columns and col != 'start_sample':  # ignore segment index column if exists
                    left_val = left_df.iloc[i][col]
                    right_val = right_df.iloc[i][col]
                    segment_dict[f'{col}_asym'] = compute_asymmetry(left_val, right_val)
            asymmetry_list.append(segment_dict)

        # Save to CSV
        asym_df = pd.DataFrame(asymmetry_list)
        save_path = os.path.join(output_folder, f"{os.path.splitext(file)[0]}_asymmetry.csv")
        asym_df.to_csv(save_path, index=False)
        print(f"Saved asymmetry for {file} to {save_path}")


In [14]:
import os
import pandas as pd

# Folder containing your CSV files
folder_path = r"C:\Users\91934\Desktop\project minor\asymmetry_all"

# Output master CSV file
output_file = r"C:\Users\91934\Desktop\project minor\asymmetrymaster.csv"

# List to store DataFrames
df_list = []

# Loop through all files in the folder
for file_name in os.listdir(folder_path):
    if file_name.endswith(".csv"):
        file_path = os.path.join(folder_path, file_name)
        df = pd.read_csv(file_path)
        # Optional: add a column to track source file
        df['source_file'] = file_name
        df_list.append(df)

# Concatenate all DataFrames
master_df = pd.concat(df_list, ignore_index=True)

# Save to master CSV
master_df.to_csv(output_file, index=False)

print(f"Master CSV created with {len(master_df)} rows at: {output_file}")


Master CSV created with 3399 rows at: C:\Users\91934\Desktop\project minor\asymmetrymaster.csv


In [27]:
import pandas as pd

# Path to your master CSV
master_csv = r"C:\Users\91934\Desktop\project minor\asymmetrymaster.csv"

# Path for the new CSV with classification
output_csv = r"C:\Users\91934\Desktop\project minor\classified.csv"

# Load the master dataset
df = pd.read_csv(master_csv)

# Function to classify a row
def classify_row(row):
    # Example using ch0 and one PLV; adjust thresholds as needed
    alpha_cond = row['ch0_alpha_power_asym'] > 0
    beta_cond = row['ch0_beta_power_asym'] > 0
    theta_cond = row['ch0_theta_power_asym'] <= 0  # approx zero or negative

    # If all conditions met, classify as 0, else 1
    return 0 if all([alpha_cond, beta_cond,theta_cond]) else 1

# Apply the function to all rows
df['class_label'] = df.apply(classify_row, axis=1)

# Save to a new CSV file
df.to_csv(output_csv, index=False)

print(f"Classification added and saved to: {output_csv}")


Classification added and saved to: C:\Users\91934\Desktop\project minor\classified.csv


In [28]:
df=pd.read_csv(r"C:\Users\91934\Desktop\project minor\classified.csv")
print(df['class_label'].value_counts())  # Count of each class

class_label
1    1823
0    1576
Name: count, dtype: int64


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load your dataset
csv_file = r"C:\Users\91934\Desktop\project minor\classified.csv"
df = pd.read_csv(csv_file)
df.dropna(inplace=True)  # Remove rows with missing values

# Features and target
X = df.drop(columns=['class_label', 'source_file', 'segment'])  # drop non-feature columns
y = df['class_label']

# Optional: scale features (important for SVM)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# Create SVM classifier (RBF kernel)
svm_clf = SVC(kernel='rbf', C=1.0, gamma='scale', probability=True, random_state=42)

# Train the classifier
svm_clf.fit(X_train, y_train)

# Make predictions
y_pred = svm_clf.predict(X_test)

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


ValueError: Found array with 0 sample(s) (shape=(0, 54)) while a minimum of 1 is required by StandardScaler.

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load dataset
csv_file = r"C:\Users\91934\Desktop\project minor\classified.csv"
df = pd.read_csv(csv_file)
df.dropna(inplace=True)  # Remove rows with missing values

# Features and target
X = df.drop(columns=['class_label', 'source_file', 'segment'])
y = df['class_label']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# Define SVM and hyperparameter grid
svm = SVC(probability=True, random_state=42)
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 0.01, 0.1, 1],
    'kernel': ['rbf', 'linear', 'poly']
}

# Grid search with 5-fold cross-validation
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best estimator
best_svm = grid_search.best_estimator_
print("Best parameters:", grid_search.best_params_)

# Make predictions
y_pred = best_svm.predict(X_test)

# Evaluate
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


ValueError: Found array with 0 sample(s) (shape=(0, 54)) while a minimum of 1 is required by StandardScaler.

In [5]:
df=pd.read_csv(r"C:\Users\91934\Desktop\project minor\classified.csv")
df.isnull().sum()

segment                          0
ch0_psd_mean_asym                0
ch0_theta_power_asym             0
ch0_alpha_power_asym             0
ch0_beta_power_asym              0
ch0_alpha_theta_ratio_asym       0
ch0_alpha_beta_ratio_asym        0
ch0_hjorth_mobility_asym         0
ch0_hjorth_complexity_asym    3399
ch0_lz_complexity_asym           0
ch0_lyapunov_asym               39
ch0_entropy_asym                 0
ch0_fractal_dim_asym            37
ch1_psd_mean_asym                0
ch1_theta_power_asym             0
ch1_alpha_power_asym             0
ch1_beta_power_asym              0
ch1_alpha_theta_ratio_asym       0
ch1_alpha_beta_ratio_asym        0
ch1_hjorth_mobility_asym         0
ch1_hjorth_complexity_asym    3399
ch1_lz_complexity_asym           0
ch1_lyapunov_asym               39
ch1_entropy_asym                 0
ch1_fractal_dim_asym            37
ch2_psd_mean_asym                0
ch2_theta_power_asym             0
ch2_alpha_power_asym             0
ch2_beta_power_asym 

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load dataset
csv_file = r"C:\Users\91934\Desktop\project minor\classified.csv"
df = pd.read_csv(csv_file)



# Encode class labels if not numeric
le = LabelEncoder()
df['class_label'] = le.fit_transform(df['class_label'])
# Replace missing values with median
df.fillna(df.median(), inplace=True)
# Features and target
X = df.drop(columns=['class_label', 'source_file', 'segment'])
y = df['class_label']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# Define SVM and hyperparameter grid
svm = SVC(probability=True, random_state=42)
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 0.01, 0.1, 1],
    'kernel': ['rbf', 'linear', 'poly']
}

# Grid search with 5-fold cross-validation
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best estimator
best_svm = grid_search.best_estimator_
print("Best parameters:", grid_search.best_params_)

# Make predictions
y_pred = best_svm.predict(X_test)

# Evaluate
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


TypeError: Cannot convert [['s00_features_asymmetry.csv' 's00_features_asymmetry.csv'
  's00_features_asymmetry.csv' ... 's32_features_asymmetry.csv'
  's32_features_asymmetry.csv' 's32_features_asymmetry.csv']] to numeric

In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load dataset
csv_file = r"C:\Users\91934\Desktop\project minor\classified.csv"
df = pd.read_csv(csv_file)



# Encode class labels if not numeric
if df['class_label'].dtype == 'object':
    le = LabelEncoder()
    df['class_label'] = le.fit_transform(df['class_label'])

# Replace missing values with median
df.fillna(df.median(), inplace=True)

# Select features: numeric columns only, excluding target and non-numeric metadata
X = df.drop(columns=['class_label', 'source_file', 'segment'])
X = X.select_dtypes(include=['float64', 'int64'])  # ensure only numeric features
y = df['class_label']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# Define SVM and hyperparameter grid
svm = SVC(probability=True, random_state=42)
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 0.01, 0.1, 1],
    'kernel': ['rbf', 'linear', 'poly']
}

# Grid search with 5-fold cross-validation
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best estimator
best_svm = grid_search.best_estimator_
print("Best parameters:", grid_search.best_params_)

# Make predictions
y_pred = best_svm.predict(X_test)

# Evaluate
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


TypeError: Cannot convert [['s00_features_asymmetry.csv' 's00_features_asymmetry.csv'
  's00_features_asymmetry.csv' ... 's32_features_asymmetry.csv'
  's32_features_asymmetry.csv' 's32_features_asymmetry.csv']] to numeric

In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load dataset
csv_file = r"C:\Users\91934\Desktop\project minor\classified.csv"
df = pd.read_csv(csv_file)

# Encode class labels if not numeric
if df['class_label'].dtype == 'object':
    le = LabelEncoder()
    df['class_label'] = le.fit_transform(df['class_label'])

# Select numeric feature columns only (exclude non-numeric metadata)
X = df.drop(columns=['class_label', 'source_file', 'segment'])
X = X.select_dtypes(include=['float64', 'int64'])  # only numeric features

# Replace missing values in numeric features with median
X.fillna(X.median(), inplace=True)

# Target
y = df['class_label']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# Define SVM and hyperparameter grid
svm = SVC(probability=True, random_state=42)
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 0.01, 0.1, 1],
    'kernel': ['rbf', 'linear', 'poly']
}

# Grid search with 5-fold cross-validation
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best estimator
best_svm = grid_search.best_estimator_
print("Best parameters:", grid_search.best_params_)

# Make predictions
y_pred = best_svm.predict(X_test)

# Evaluate
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))



  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


ValueError: 
All the 240 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
240 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\91934\AppData\Roaming\Python\Python313\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\91934\AppData\Roaming\Python\Python313\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\91934\AppData\Roaming\Python\Python313\site-packages\sklearn\svm\_base.py", line 197, in fit
    X, y = validate_data(
           ~~~~~~~~~~~~~^
        self,
        ^^^^^
    ...<5 lines>...
        accept_large_sparse=False,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\91934\AppData\Roaming\Python\Python313\site-packages\sklearn\utils\validation.py", line 2961, in validate_data
    X, y = check_X_y(X, y, **check_params)
           ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\91934\AppData\Roaming\Python\Python313\site-packages\sklearn\utils\validation.py", line 1370, in check_X_y
    X = check_array(
        X,
    ...<12 lines>...
        input_name="X",
    )
  File "C:\Users\91934\AppData\Roaming\Python\Python313\site-packages\sklearn\utils\validation.py", line 1107, in check_array
    _assert_all_finite(
    ~~~~~~~~~~~~~~~~~~^
        array,
        ^^^^^^
    ...<2 lines>...
        allow_nan=ensure_all_finite == "allow-nan",
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\91934\AppData\Roaming\Python\Python313\site-packages\sklearn\utils\validation.py", line 120, in _assert_all_finite
    _assert_all_finite_element_wise(
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
        X,
        ^^
    ...<4 lines>...
        input_name=input_name,
        ^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\91934\AppData\Roaming\Python\Python313\site-packages\sklearn\utils\validation.py", line 169, in _assert_all_finite_element_wise
    raise ValueError(msg_err)
ValueError: Input X contains NaN.
SVC does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values


In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

# Load dataset
csv_file = r"C:\Users\91934\Desktop\project minor\classified.csv"
df = pd.read_csv(csv_file)

# Encode class labels if not numeric
if df['class_label'].dtype == 'object':
    le = LabelEncoder()
    df['class_label'] = le.fit_transform(df['class_label'])

# Select numeric feature columns only (exclude target and metadata)
X = df.drop(columns=['class_label', 'source_file', 'segment','hjorth'], errors='ignore')
X = X.select_dtypes(include=['float64', 'int64'])  # keep only numeric
y = df['class_label']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Build pipeline: imputation -> scaling -> SVM
pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler()),
    ('svm', SVC(probability=True, random_state=42))
])

# Define hyperparameter grid for GridSearch
param_grid = {
    'svm__C': [0.1, 1, 10, 100],
    'svm__gamma': ['scale', 0.01, 0.1, 1],
    'svm__kernel': ['rbf', 'linear', 'poly']
}

# Grid search with 5-fold cross-validation
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best model
best_model = grid_search.best_estimator_
print("Best parameters:", grid_search.best_params_)
print("Best CV accuracy:", grid_search.best_score_)

# Make predictions
y_pred = best_model.predict(X_test)

# Evaluate
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


KeyboardInterrupt: 