▀█▀ ▀▄▀ ▀█▀    ▀█▀ ▄▀▄    ██▀ ▀▄▀ ▄▀▀ ██▀ █   
 █  █ █  █      █  ▀▄▀    █▄▄ █ █ ▀▄▄ █▄▄ █▄▄ 
**Text to Excel + Time**


In [None]:
import pandas as pd
import os

# Specify the output directory path
output_directory = "Raw/"

# Step 1: Read text files and extract data
for file_number in range(1, 60):
    filename = f"{file_number:03d}.txt"
    with open(f"Raw/{filename}") as inf:
        lines = inf.readlines()[2:]  # Skip the first two lines
        data = [line.strip().split() for line in lines]

    # Step 2: Create a DataFrame
    columns = ["1-Thumb", "2-Index", "3-Middle", "4-Ring", "5-Pinky", "Column6", "Column7", "Column8", "Column9"]
    df = pd.DataFrame(data, columns=columns)

    # Step 3: Convert data in columns to numeric (if possible)
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors="coerce")

    # Step 4: Remove the last 4 columns and 2 last rows
    df = df.iloc[:, :-4]



    # Step 5: Calculate duration (in seconds)
    num_samples = len(df)
    sampling_rate = 42
    duration_s = num_samples / sampling_rate

    # Convert duration to milliseconds
    duration_ms = duration_s * 1000

    # Calculate time increment (ms)
    time_increment_ms = 1000 / sampling_rate

    # Initialize the "Time (ms)" column with zeros
    df["Time (ms)"] = 0

    # Update the "Time (ms)" column for each row
    for i in range(num_samples):
        df.loc[i, "Time (ms)"] = int(i * time_increment_ms)  # Explicitly cast to int
    df=df.iloc[370:-100]
    
    # Save to an Excel file with the same name
    output_filename = os.path.join(output_directory, os.path.splitext(filename)[0] + ".xlsx")
    df.to_excel(output_filename, sheet_name="Sheet1", index=False)
    print(f"Saved {output_filename}")

print("Conversion completed successfully!")


█▀▄ ▄▀▄ ▀█▀ ▄▀▄    █▄ ▄█ ▄▀▄ █▄ ▄█ ██▀ █▄ █ ▀█▀ 
█▄▀ █▀█  █  █▀█    █ ▀ █ ▀▄▀ █ ▀ █ █▄▄ █ ▀█  █  
**Adding Data Moments**

In [None]:
import numpy as np
import os
import pandas as pd
import scipy.signal as signal
import plotly.express as px
import statistics as sts
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import preprocessing_module as prep
columns = ['1-Thumb', '2-Index', '3-Middle', '4-Ring', '5-Pinky']
columns_exist = ['1-Thumb', '2-Index', '3-Middle', '4-Ring', '5-Pinky']
columns_mean=[]
columns_std=[]
columns_var=[]
columns_der1=[]
columns_skew=[]
columns_ewm=[]
num_parts=7
delay=210
# Specify the output directory path
output_directory = "DataMoments/"
input_directory = "Raw/"
rin=1
rout=60
for column in columns_exist:
    mean_column_name = f"{column}_mean"
    std_column_name = f"{column}_std"        
    ewm_column_name = f"{column}_ewm"
    var_column_name = f"{column}_var"
    Skew_column_name = f"{column}_skew"
    der1_column_name = f"{column}_der1"    
    columns_std.append(std_column_name)
    columns_der1.append(der1_column_name)        
    columns_mean.append(mean_column_name)
    columns_var.append(var_column_name)
    columns_skew.append(Skew_column_name)
    columns_ewm.append(ewm_column_name)
# Step 1: Read text files and extract data
for file_number in range(rin,rout):
    filename = f"{file_number:03d}.xlsx"
    filepath = os.path.join(input_directory, filename)

    # Read the Excel file into a DataFrame
    df = pd.read_excel(filepath, sheet_name="Sheet1")
    df2=pd.read_excel(filepath, sheet_name="Sheet1")
    df3=pd.read_excel(filepath, sheet_name="Sheet1")
    samplingrate= len(df)/(df['Time (ms)'].iloc[-1]/1000)
    part_size = len(df)//num_parts
    df=df.iloc[370:]
    x_axis = df['Time (ms)'] / 1000
    df["Label"]=""

    

# __________________Mean
    for column in columns_exist:
        parts = [df2.loc[i * part_size+delay: (i + 1) * part_size, column] for i in range (num_parts)]
        for i, part in enumerate(parts):
            part_series = pd.Series(part)
            mean= sts.mean(part_series)
            df2.loc[i * part_size: (i + 1) * part_size, column]= mean
            df[f"{column}_mean"]=df2[column]
            
# █ █ ▄▀▄ █▀▄ █ ▄▀▄ █▄ █ ▄▀▀ ██▀ 
# ▀▄▀ █▀█ █▀▄ █ █▀█ █ ▀█ ▀▄▄ █▄▄ 
# _____________________Varinace
    for column in columns_exist:
        df[f"{column}_var"] = df[column].rolling(window=3).var()
        df[f"{column}_var"].bfill()
# _____________________
# ▄▀▀ ▀█▀ █▀▄ 
# ▄█▀  █  █▄▀ 
    # for column in columns_exist:
    #     df[f"{column}_std"] = df[column].rolling(window=2).std()
    #     df[f"{column}_std"].bfill()

# ▄▀▀ █▄▀ ██▀ █   █ 
# ▄█▀ █ █ █▄▄ ▀▄▀▄▀ 
# _____________________Skew
    for column in columns_exist:
        df[f"{column}_skew"] = abs(df2[column].rolling(window=3).skew().shift(-110))
        df[f"{column}_skew"].bfill()


    # fig = go.Figure()
    # fig2= go.Figure()
    # fig3 = go.Figure()
    # for column in columns:
    #     # fig.add_trace(go.Scatter(x=df.index, y=df[f"{column}_std"], mode='lines', name=f'{column}'))
    #     fig.add_trace(go.Scatter(x=df.index, y=df[f"{column}_skew"]*400, mode='lines', name=f'{column}'))
    #     fig.add_trace(go.Scatter(x=df.index, y=df[f"{column}"], mode='lines', name=f'{column}'))
    # fig.show()

    # fig.update_layout(
    #     title='1st derivative',
    #     xaxis_title='time',
    #     yaxis_title='Elec Res',
    #     legend_title='Legends'
    # )

    output_filename = os.path.join(output_directory, os.path.splitext(filename)[0] + ".xlsx")
    df.to_excel(output_filename, sheet_name="Sheet1", index=False)
    print(f"Saved {output_filename}")

print("Conversion completed successfully!")    


▄▀▀ ▄▀▄ ▄▀▀ █   ██▀ 
▄█▀ █▀█ ▀▄▄ █▄▄ █▄▄ 
**Scaling Data**

In [None]:
import numpy as np
import os
import pandas as pd
import scipy.signal as signal
import plotly.express as px
import statistics as sts
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import preprocessing_module as prep
columns = ['1-Thumb', '2-Index', '3-Middle', '4-Ring', '5-Pinky']
columns_exist = ['1-Thumb', '2-Index', '3-Middle', '4-Ring', '5-Pinky']
columns_mean=[]
columns_std=[]
columns_var=[]
columns_der1=[]
columns_skew=[]
columns_MinMax=[]

pd.options.mode.copy_on_write = True

rin = 1
rout = 60

MeanNormalization_path = "Scaled/Mean Normalization"
MinMaxScaler_path = "Scaled/MinMax scaler"
robustscaler_path = "Scaled/robustscaler"
standarization_path = "Scaled/standardized"
num_parts = 7
delay = 210
# Specify the output directory path
output_directory = "Scaled/"
input_directory = "DataMoments/"
for column in columns_exist:
    mean_column_name = f"{column}_mean"
    std_column_name = f"{column}_std"        
    var_column_name = f"{column}_var"
    Skew_column_name = f"{column}_skew"
    MinMax_column_name = f"{column}_MinMax"    
    columns_std.append(std_column_name)
    columns_mean.append(mean_column_name)
    columns_var.append(var_column_name)
    columns_skew.append(Skew_column_name)
    columns_MinMax.append(MinMax_column_name)



for file_number in range(rin, rout):
    filename = f"{file_number:03d}.xlsx"
    filepath = os.path.join(input_directory, filename)
    # Read the Excel file into a DataFrame
    df = pd.read_excel(filepath, sheet_name="Sheet1")
    df2 = pd.read_excel(filepath, sheet_name="Sheet1")
    df3 = pd.read_excel(filepath, sheet_name="Sheet1")
    df4 = pd.read_excel(filepath, sheet_name="Sheet1")
    part_size = len(df)//num_parts
    # Step 2: Create a DataFrame
    x_axis = df['Time (ms)'] / 1000


# 
# █▄ ▄█ █ █▄ █    █▄ ▄█ ▄▀▄ ▀▄▀ 
# █ ▀ █ █ █ ▀█    █ ▀ █ █▀█ █ █ 
    for column in columns:
        df[f"{column}_MinMax"]=prep.min_max_scaling(df[column])
    fig=go.Figure()
    # for column in columns:
    #     # fig.add_trace(go.Scatter(x=df.index, y=df[f"{column}_std"], mode='lines', name=f'{column}'))
    #     fig.add_trace(go.Scatter(x=df.index, y=df[f"{column}_skew"]/max(df[f"{column}_skew"]), mode='lines', name=f'{column}'))
    #     fig.add_trace(go.Scatter(x=df.index, y=df[f"{column}_MinMax"], mode='lines', name=f'{column}'))
    # fig.show()
    
# ▄▀▀ ▀█▀ ▄▀▄ █▄ █ █▀▄ ▄▀▄ █▀▄ █▀▄ █ ▀█▀ ██▀ █▀▄ 
# ▄█▀  █  █▀█ █ ▀█ █▄▀ █▀█ █▀▄ █▄▀ █ █▄▄ █▄▄ █▄▀ 
    for column in columns:
        df[f"{column}_Standardized"]=prep.mean_normalization(df[column])
    # fig=go.Figure()
    # for column in columns:
        # fig.add_trace(go.Scatter(x=df.index, y=df[f"{column}_std"], mode='lines', name=f'{column}'))
    #     fig.add_trace(go.Scatter(x=df.index, y=df[f"{column}_skew"], mode='lines', name=f'{column}'))
    #     fig.add_trace(go.Scatter(x=df.index, y=df[f"{column}_Standardized"], mode='lines', name=f'{column}'))
    # fig.show()
    
# █▀▄ ▄▀▄ ██▄ █ █ ▄▀▀ ▀█▀ 
# █▀▄ ▀▄▀ █▄█ ▀▄█ ▄█▀  █  
    for column in columns:
        df[f"{column}_RobustScaler"]=prep.robust_scaler(df[column])
    # fig=go.Figure()
    # for column in columns:
    #     # fig.add_trace(go.Scatter(x=df.index, y=df[f"{column}_std"], mode='lines', name=f'{column}'))
    #     fig.add_trace(go.Scatter(x=df.index, y=df[f"{column}_skew"], mode='lines', name=f'{column}'))
    #     fig.add_trace(go.Scatter(x=df.index, y=df[f"{column}_RobustScaler"], mode='lines', name=f'{column}'))
    # fig.show()
    
    # for column in columns:
    #     asd=prep.calculate_section_func(df,f"{column}_mean",0,0,"skew",prep.first_derivative,column)
    #     print(asd)
    # fig=go.Figure()
    # for column in columns:
    #     # fig.add_trace(go.Scatter(x=df.index, y=df[f"{column}_std"], mode='lines', name=f'{column}'))
    #     # fig.add_trace(go.Scatter(x=df.index, y=df[f"{column}_skew"], mode='lines', name=f'{column}'))
    #     fig.add_trace(go.Scatter(x=x_axis, y=asd, mode='lines', name=f'{column}'))
    # fig.show()    
    # fig = px.scatter(df, x=x_axis, y=columns_to_plot, title=f" Mean segmented   {filename} basedon    Time (ms)")
    # fig.update_xaxes(title_text="Time (s)")
    # fig.update_yaxes(title_text="Electrical Resistance (ohms)")
    # fig.show()
    # fig = px.line(df, x=x_axis, y=columns_Norm,
    #             title=f" MinMaxNorm {filename}    Time (ms) ")
    # fig.update_xaxes(title_text="Time (s)")
    # fig.update_yaxes(title_text="Electrical Resistance (ohms)")
    # fig.show()
    output_filename = os.path.join(output_directory, os.path.splitext(filename)[0] + "_Sacled.xlsx")
    df.to_excel(output_filename, sheet_name="Sheet1", index=False)
    print(f"Saved {output_filename}")

    

█▀ ██▀ ▄▀▄ ▀█▀ █ █ █▀▄ ██▀ ▄▀▀ 
█▀ █▄▄ █▀█  █  ▀▄█ █▀▄ █▄▄ ▄█▀ 
**Feature Extraction**

In [None]:
import numpy as np
import os
import pandas as pd
import scipy.signal as signal
import plotly.express as px
import statistics as sts
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import preprocessing_module as prep


columns = ['1-Thumb', '2-Index', '3-Middle', '4-Ring', '5-Pinky']
columns_mean=[]
columns_std=[]
columns_var=[]
columns_der1=[]
columns_skew=[]
columns_MinMax=[]
columns_Standardized=[]
columns_RobustScaler=[]
columns_closure =[]
input_directory = "Scaled/"
output_directory = "Features/"
rin=1
rout=60
num_parts=7




    
Gesturs_Label=['g1', 'g2','g3','g4','g5','g6','g7']
Gestures_Closures=[
    [0,1,0,0,0,0,1],
    [0,1,1,1,1,1,1],
    [0,1,0,1,1,1,0],
    [0,1,0,0,1,1,0],
    [0,1,0,0,0,1,1]
] 

columns_Features = [
    "1-Thumb-closure", "2-Index-closure", "3-Middle-closure", "4-Ring-closure", "5-Pinky-closure",
    "1-Thumb-mean", "2-Index-mean", "3-Middle-mean", "4-Ring-mean", "5-Pinky-mean",
    "1-Thumb-median", "2-Index-median", "3-Middle-median", "4-Ring-median", "5-Pinky-median",
    "1-Thumb-max", "2-Index-max", "3-Middle-max", "4-Ring-max", "5-Pinky-max",
    
    "Gesture"  # Column for gesture label
]

for column in columns:
    mean_column_name = f"{column}_mean"
    columns_mean.append(columns_mean)
    var_column_name = f"{column}_var"
    columns_var.append(columns_var)
    std_column_name = f"{column}_std"
    columns_std.append(columns_std)
    skew_column_name = f"{column}_skew"
    columns_skew.append(columns_skew)
    MinMax_column_name = f"{column}_MinMax"
    columns_MinMax.append(columns_MinMax)
    Standardized_column_name = f"{column}_Standardized"
    columns_Standardized.append(columns_Standardized)
    RobustScaler_column_name = f"{column}_RobustScaler"
    columns_RobustScaler.append(columns_RobustScaler)
    closure_column_name = f"{column}_closure"
    columns_closure.append(columns_closure)   

    

# ---------------------------------------------------------
for file_number in range(rin, rout):
    filename = f"{file_number:03d}_Sacled.xlsx"
    filepath = os.path.join(input_directory, filename)
    df = pd.read_excel(filepath, sheet_name="Sheet1")
    part_size = len(df)//num_parts    
    # Feature Data Frame
    # df_features = pd.DataFrame()
    df_copy=df.copy()
    data_to_save = []
    
    Means=[]
    Medians=[]
    Maxs=[]


            # █▄ ▄█ ██▀ ▄▀▄ █▄ █    █   █ █ █▄ █ █▀▄ ▄▀▄ █   █ ▄▀▀ 
            # █ ▀ █ █▄▄ █▀█ █ ▀█    ▀▄▀▄▀ █ █ ▀█ █▄▀ ▀▄▀ ▀▄▀▄▀ ▄█▀     
            # calculating means on windows
    for col in columns:
        indices= prep.find_peaks_indices(1,df,col,"skew")
        if col=="1-Thumb":
            for i in range(0):
                prep.calculate_sts_sections(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,func=prep.wmean,w=120,move=80,mode="skew")
            means1=prep.calculate_section_sts(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,move=-80,mode="skew",func=prep.pmean)
            Means.append(means1)
        if col=="2-Index":
            for i in range(0):
                prep.calculate_sts_sections(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,func=prep.wmean,w=120,move=80,mode="skew")
            means2=prep.calculate_section_sts(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,move=-80,mode="skew",func=prep.pmean)
            Means.append(means2) 
        if col=="3-Middle":
            for i in range(0):
                prep.calculate_sts_sections(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,func=prep.wmean,w=120,move=80,mode="skew")
            means3=prep.calculate_section_sts(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,move=-80,mode="skew",func=prep.pmean)
            Means.append(means3)    
        if col=="4-Ring":
            for i in range(0):
                prep.calculate_sts_sections(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,func=prep.wmean,w=120,move=80,mode="skew")
            means4=prep.calculate_section_sts(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,move=-80,mode="skew",func=prep.pmean)
            Means.append(means4)    
        if col=="5-Pinky":
            for i in range(0):
                prep.calculate_sts_sections(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,func=prep.wmean,w=120,move=80,mode="skew")
            means5=prep.calculate_section_sts(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,move=-80,mode="skew",func=prep.pmean)
            Means.append(means5)


    
# █▄ ▄█ ██▀ █▀▄ █ ▄▀▄ █▄ █    █   █ █ █▄ █ █▀▄ ▄▀▄ █   █             
# █ ▀ █ █▄▄ █▄▀ █ █▀█ █ ▀█    ▀▄▀▄▀ █ █ ▀█ █▄▀ ▀▄▀ ▀▄▀▄▀                 
    for col in columns:
        if col=="1-Thumb":
            for i in range(0):
                prep.calculate_sts_sections(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,func=prep.wmean,w=120,move=80,mode="skew")
            medians1=prep.calculate_section_sts(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,move=-80,mode="skew",func=prep.pmedian)
            Medians.append(medians1)
        if col=="2-Index":
            for i in range(0):
                prep.calculate_sts_sections(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,func=prep.wmean,w=120,move=80,mode="skew")
            medians2=prep.calculate_section_sts(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,move=-80,mode="skew",func=prep.pmedian)
            Medians.append(medians2) 
        if col=="3-Middle":
            for i in range(0):
                prep.calculate_sts_sections(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,func=prep.wmean,w=120,move=80,mode="skew")
            medians3=prep.calculate_section_sts(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,move=-80,mode="skew",func=prep.pmedian)
            Medians.append(medians3)    
        if col=="4-Ring":
            for i in range(0):
                prep.calculate_sts_sections(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,func=prep.wmean,w=120,move=80,mode="skew")
            medians4=prep.calculate_section_sts(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,move=-80,mode="skew",func=prep.pmedian)
            Medians.append(medians4)    
        if col=="5-Pinky":
            for i in range(0):
                prep.calculate_sts_sections(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,func=prep.wmean,w=120,move=80,mode="skew")
            medians5=prep.calculate_section_sts(df=df_copy,col=f"{col}_MinMax",col2=col,height=0,move=-80,mode="skew",func=prep.pmedian)
            Medians.append(medians5)    
   
# █▄ ▄█ █ █▄ █       █▄ ▄█ ▄▀▄ ▀▄▀             
# █ ▀ █ █ █ ▀█       █ ▀ █ █▀█ █ █
    for col in columns:
        if col=="1-Thumb":
            for i in range(0):
                prep.calculate_sts_sections(df=df_copy, col=f"{col}_MinMax", col2=col, height=0, func=prep.wmean, w=120, move=80, mode="skew")
            max1=prep.calculate_section_sts(df=df_copy, col=f"{col}_MinMax", col2=col, height=0, move=-80, mode="skew", func=prep.peak_amplitude)
            Maxs.append(max1)
        if col=="1-Thumb":
            for i in range(0):
                prep.calculate_sts_sections(df=df_copy, col=f"{col}_MinMax", col2=col, height=0, func=prep.wmean, w=120, move=80, mode="skew")
            max2=prep.calculate_section_sts(df=df_copy, col=f"{col}_MinMax", col2=col, height=0, move=-80, mode="skew", func=prep.peak_amplitude)
            Maxs.append(max2)
        if col=="1-Thumb":
            for i in range(0):
                prep.calculate_sts_sections(df=df_copy, col=f"{col}_MinMax", col2=col, height=0, func=prep.wmean, w=120, move=80, mode="skew")
            max3=prep.calculate_section_sts(df=df_copy, col=f"{col}_MinMax", col2=col, height=0, move=-80, mode="skew", func=prep.peak_amplitude)
            Maxs.append(max3)
        if col=="1-Thumb":
            for i in range(0):
                prep.calculate_sts_sections(df=df_copy, col=f"{col}_MinMax", col2=col, height=0, func=prep.wmean, w=120, move=80, mode="skew")
            max4=prep.calculate_section_sts(df=df_copy, col=f"{col}_MinMax", col2=col, height=0, move=-80, mode="skew", func=prep.peak_amplitude)
            Maxs.append(max4)
        if col=="1-Thumb":
            for i in range(0):
                prep.calculate_sts_sections(df=df_copy, col=f"{col}_MinMax", col2=col, height=0, func=prep.wmean, w=120, move=80, mode="skew")
            max5=prep.calculate_section_sts(df=df_copy, col=f"{col}_MinMax", col2=col, height=0, move=-80, mode="skew", func=prep.peak_amplitude)
            Maxs.append(max5)



# ▄▀▀ █▀▄ ██▀ ▄▀▄ ▀█▀ █ █▄ █ ▄▀     █▀ █ █   ██▀ ▄▀▀ 
# ▀▄▄ █▀▄ █▄▄ █▀█  █  █ █ ▀█ ▀▄█    █▀ █ █▄▄ █▄▄ ▄█▀ 
    df_features = pd.DataFrame(columns=columns_Features)
    
    # Add rows of data
    for i in range(7):  # 7 rows
        row = []

        # Append Gesture Closures
        row.extend(Gestures_Closures[j][i] for j in range(len(Gestures_Closures)))

        # Append Means
        row.extend(Means[j][i] for j in range(len(Means)))

        # Append Medians
        row.extend(Medians[j][i] for j in range(len(Medians)))

        # Append Maxs
        row.extend(Maxs[j][i] for j in range(len(Maxs)))

        # Append the gesture label
        row.append(Gesturs_Label[i])  
        # print(row)
    
        df_features.loc[i] = row 
    # Create a DataFrame from the data
    output_filename = os.path.join(output_directory, f"{file_number:03d}_Featured.xlsx")  # Unique filename
    df_features.to_excel(output_filename, sheet_name="Sheet1", index=False)
    print(f"Saved {output_filename}")

▄▀▀ ▄▀▄ █▄ ▄█ ██▄ █ █▄ █ ██▀ 
▀▄▄ ▀▄▀ █ ▀ █ █▄█ █ █ ▀█ █▄▄ 
**Combine all files to one unified**

In [None]:
import os
import pandas as pd

# Specify your input directory
input_directory = "Features/"
output_directory = "Features/"
output_filename = os.path.join(output_directory, os.path.splitext("Features3")[0] + ".xlsx")

# Initialize an empty DataFrame to hold the combined data
combined_df = pd.DataFrame()

# Loop through each file
for file_number in range(1, 60):
    filename = f"{file_number:03d}_Featured.xlsx"
    filepath = os.path.join(input_directory, filename)

    df = pd.read_excel(filepath, sheet_name="Sheet1")
    combined_df = pd.concat([combined_df, df], ignore_index=True)  # Concatenate data
combined_df.iloc[:, 5:21] = combined_df.iloc[:, 5:21].round(3)
# Save the combined data to the destination Excel file
with pd.ExcelWriter(output_filename, mode="w", engine="openpyxl") as writer:
    combined_df.to_excel(writer, sheet_name="Combined_Data", index=False)

print(f"Data saved to {output_filename}")


▄▀▄ █   ▄▀  ▄▀▄ █▀▄ █ ▀█▀ █▄█ █▄ ▄█ ▄▀▀ 
█▀█ █▄▄ ▀▄█ ▀▄▀ █▀▄ █  █  █ █ █ ▀ █ ▄█▀ 
**Algorithms**


In [None]:
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import xgboost as xgb
import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold
import catboost
import seaborn as sns
import os
os.environ["LOKY_MAX_CPU_COUNT"] = "4"  # Replace 4
# ... (set LOKY_MAX_CPU_COUNT as shown above)

# Define the number of folds for k-fold cross-validation
n_folds = 30

# Define the list of classifiers to use
classifiers = [
    ("SVM", SVC()),
    ("KNN", KNeighborsClassifier()),
    ("Decision Tree", DecisionTreeClassifier()),
    ("Random Forest", RandomForestClassifier()),
    ("Neural Network", MLPClassifier(
        max_iter=1000,  # Still set a high max_iter for safety
        early_stopping=True,
        validation_fraction=0.1,  # Portion of data for validation
        n_iter_no_change=10,  # Number of epochs without improvement 
    )),
    ("Logistic Regression", LogisticRegression()),
    ("Naive Bayes", GaussianNB()),
    ("AdaBoost", AdaBoostClassifier(algorithm='SAMME')),
    ("Gradient Boosting", GradientBoostingClassifier()),
    ("Stochastic Gradient Descent", SGDClassifier()),
    # ("XGBoost", xgb.XGBClassifier()),  # You'll need to install XGBoost
    ("LightGBM", lgb.LGBMClassifier()), # You'll need to install LightGBM
    ("CatBoost", catboost.CatBoostClassifier()) # You'll need to install CatBoost
]

# Initialize an empty dictionary to store the results
results = {}

# Iterate over each feature file
df = pd.read_excel("Features/Features3.xlsx")
    # Extract the features and labels
X = df.iloc[:, 6:20]
y = df["1-Thumb-closure"]

    # Initialize the k-fold cross-validation object
kf = StratifiedKFold(n_splits=n_folds, shuffle=True,random_state=43)    # Iterate over each fold
for train_index, test_index in kf.split(X, y):
    # Split the data into training and testing sets
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    # Iterate over each classifier
    for name, classifier in classifiers:
        # Train the classifier on the training data
        classifier.fit(X_train, y_train)
        # Make predictions on the testing data
        y_pred = classifier.predict(X_test)
        # Calculate the accuracy score
        accuracy = accuracy_score(y_test, y_pred)
        # print(f"Unique labels in y_test: {set(y_test)}")
        # print(f"Unique labels in y_pred: {set(y_pred)}")
        # Calculate the confusion matrix
        cm = confusion_matrix(y_test, y_pred)
        # Store the results
        i=1
        if (name, i) not in results:
            results[(name, i)] = []
        results[(name, i)].append((accuracy, cm))

# Print the results and plot confusion matrices
for (name, i), accuracy_cm_tuples in results.items():
    average_accuracy = sum(acc for acc, cm in accuracy_cm_tuples) / len(accuracy_cm_tuples)
    print(f"Classifier: {name}, File: {i}, Average Accuracy: {average_accuracy}")

    # Plot confusion matrices for each fold
    for j, (acc, cm) in enumerate(accuracy_cm_tuples):
        print(f"  Fold {j + 1} Confusion Matrix:")
        print(cm)
        plt.figure(figsize=(2, 1))  # Adjust figure size as needed
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
        plt.title(f"Confusion Matrix - {name} - File {i} - Fold {j + 1}")
        plt.xlabel("Predicted Label")
        plt.ylabel("True Label")
        plt.show()
        print("-" * 40)