# Import Basic Libraries and Modules

In [1]:
import pandas as pd
import numpy as np
import pickle
import os
from All_Featurizers import *
from All_ML_Models import *

import warnings

# Suppress all warnings
warnings.filterwarnings('ignore')

# Inputs & Parameters

In [2]:
# featurizer_map={'RDKit':[RDKit_Descriptors_Featurizer, RDKit_FPGenerator_Featurizer, RDKit_AtomPairGenerator_Featurizer, RDKit_TopologicalTorsionGenerator_Featurizer],
#                 'Descriptastorus': [Descriptastorus_MorganCounts_Featurizer, Descriptastorus_ChiralMorganCounts_Featurizer, Descriptastorus_FeatureCounts_Featurizer, Descriptastorus_AtomPairCounts_Featurizer, Descriptastorus_RDKitFPBits_Featurizer, Descriptastorus_RDKitFPUnbranched_Featurizer, Descriptastorus_RDKit2D_Featurizer, Descriptastorus_RDKit2DNormalized_Featurizer],
#                 'DGL': [DGL_Mol2Bigraph_Featurizer, DGL_Mol2CompleteGraph_Featurizer],
#                 'Pytorch': [PytorchGeometric_Featurizer],
#                 'Hyperparameter': [RDKit_Descriptors_Featurizer],
#                 'All_Unique': [RDKit_Descriptors_Featurizer, RDKit_FPGenerator_Featurizer, RDKit_AtomPairGenerator_Featurizer, Descriptastorus_MorganCounts_Featurizer, Descriptastorus_RDKit2DNormalized_Featurizer, DGL_Mol2CompleteGraph_Featurizer, PytorchGeometric_Featurizer],
#                 'DeepChem': [DeepChem_ConvMol_Featurizer, DeepChem_Weave_Featurizer, DeepChem_CircularFP_Featurizer, DeepChem_MACCS_Featurizer, DeepChem_MolGraphConv_Featurizer, DeepChem_DMPNN_Featurizer, DeepChem_PAGTN_Featurizer]
#                }
featurizer_map={'RDKit':[RDKit_Descriptors_Featurizer, RDKit_FPGenerator_Featurizer, RDKit_AtomPairGenerator_Featurizer],
                'Descriptastorus': [Descriptastorus_MorganCounts_Featurizer, Descriptastorus_RDKit2DNormalized_Featurizer],
                'Pytorch': [PytorchGeometric_Featurizer],
                'Hyperparameter': [RDKit_Descriptors_Featurizer],
                'All_Unique': [RDKit_Descriptors_Featurizer, RDKit_FPGenerator_Featurizer, RDKit_AtomPairGenerator_Featurizer, Descriptastorus_MorganCounts_Featurizer, Descriptastorus_RDKit2DNormalized_Featurizer, PytorchGeometric_Featurizer],
                'DeepChem': [DeepChem_CircularFP_Featurizer, DeepChem_MACCS_Featurizer, DeepChem_PAGTN_Featurizer]
               }


# data_splitter_map= {'Train_Test_Data_Splitter': [Train_Test_Data_Splitter],
#                     'K_Fold_Data_Splitter': [K_Fold_Data_Splitter],
#                     'SK_Fold_Data_Splitter': [SK_Fold_Data_Splitter],
#                     'GK_Fold_Data_Splitter': [GK_Fold_Data_Splitter],
#                     'LOOCV_Data_Splitter': [LOOCV_Data_Splitter],
#                    }
# 
# 
# classification_model_map= {'Binary_Classifier': [Binary_Classifier],
#                           'MultiClass_Classifier': [MultiClass_Classifier],
#                           'Probabilistic_Classifier': [Probabilistic_Classifier]
#                           }

ml_model_map= {'ML_Model_RF_Regression':[ML_Model_RF_Regression],
               'ML_Model_RF_Classification':[ML_Model_RF_Classification],
               'ML_Model_SVM_Classification':[ML_Model_SVM_Classification],
               'ML_Model_kNN_Classification':[ML_Model_kNN_Classification],
               'ML_Model_GB_Classification':[ML_Model_GB_Classification],
               'ML_Model_LR_Classification':[ML_Model_LR_Classification],
               'All_Classification_Models':[ML_Model_RF_Classification, ML_Model_SVM_Classification, ML_Model_kNN_Classification, ML_Model_GB_Classification, ML_Model_LR_Classification]
              }


# Names of files that contain SMILES, composition, and target property 
smiles_file = r"C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Data\Data_Combined\Cell_Viability\InVitro_SMILES.xlsx"
smiles_sheet_name= "SMILES"
target_file = r"C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Data\Data_Combined\Cell_Viability\InVitro_Cell_Viability.xlsx"
target_sheet_name= "4 bins"
output_file = r"C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Results\Data_Combined\Cell_Viability\Classification Report 4 bins.xlsx"

cr_sheet_name= 'Classification Report'   # sheet for classification report
accuracy_sheet_name= "Class Accuracies"
precision_sheet_name= "Class Precision"
recall_sheet_name= "Class Recall"
f1score_sheet_name= "Class F1-Score"
ypred_sheet_name= 'Predicted Y Test'   # sheet for predicted y test
ypred_full_sheet_name= 'Predicted Y Full'   # sheet for predicted y full


featurizer_style= 'All_Unique'
ml_model_style= 'All_Classification_Models'



# Load dataset

In [3]:
smiles_df = pd.read_excel(smiles_file, sheet_name= smiles_sheet_name)
target_df = pd.read_excel(target_file, sheet_name= target_sheet_name)

Nrows, Ncolumns = smiles_df.shape
Nconstituents= (Ncolumns-1)//2


ValueError: Worksheet named '4 bins' not found

# Implementation >>>>

# Featurization

In [None]:
# Initialize dictionaries to store the output
dataX_dict = {}
datay_dict = {}

# Iterate over the featurizer functions based on the featurizer_style
for ff in featurizer_map[featurizer_style]:
    # Call the function
    dataX, datay = ff(smiles_df, target_df, Nrows, Nconstituents)
    
    # Store the output in the dictionaries with a unique key
    featurizer_name = ff.__name__  # Get the featurizer function name
    dataX_dict[featurizer_name] = dataX
    datay_dict[featurizer_name] = datay


# Create the filename using the featurizer_name variable
filename = f"dataX_dict_4bins_{featurizer_style}.pkl"
# Save the variable to a file
with open(rf"C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Scripts\ML_Framework\Data_Combined\Cell_Viability_pickle_files\{filename}", 'wb') as file:
    pickle.dump(dataX_dict, file)

filename = f"datay_dict_4bins_{featurizer_style}.pkl"
# Save the variable to a file
with open(rf"C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Scripts\ML_Framework\Data_Combined\Cell_Viability_pickle_files\{filename}", 'wb') as file:
    pickle.dump(datay_dict, file)


In [None]:
# # Create the filename using the featurizer_name variable
# filename = f"dataX_dict_4bins_{featurizer_style}.pkl"
# # Save the variable to a file
# with open(rf"C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Scripts\ML_Framework\Data_Combined\Cell_Viability_pickle_files\{filename}", 'rb') as file:
#     dataX_dict= pickle.load(file)

# filename = f"datay_dict_4bins_{featurizer_style}.pkl"
# # Save the variable to a file
# with open(rf"C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Scripts\ML_Framework\Data_Combined\Cell_Viability_pickle_files\{filename}", 'rb') as file:
#     datay_dict= pickle.load(file)

# Data splitting

In [None]:
from sklearn.model_selection import train_test_split

# Inputs parameters for Train-Test Data Splitting
test_ratio= 0.2
num_random= 42

# Initialize dictionaries to store the output
dataX_train_dict = {}
dataX_test_dict = {}
datay_train_dict = {}
datay_test_dict = {}

# Iterate over the functions based on the featurizer_style
for ff in featurizer_map[featurizer_style]:
    featurizer_name = ff.__name__  # Get the featurizer function name
    
    dataX_train, dataX_test, datay_train, datay_test= train_test_split(dataX_dict[featurizer_name], datay_dict[featurizer_name], test_size= test_ratio, random_state= num_random)
    
    # Store the output in the dictionaries with a unique key
    dataX_train_dict[featurizer_name] = dataX_train
    dataX_test_dict[featurizer_name] = dataX_test
    datay_train_dict[featurizer_name] = datay_train
    datay_test_dict[featurizer_name] = datay_test

filename = f"dataX_train_dict_4bins_{featurizer_style}.pkl"
# Save the variable to a file
with open(rf"C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Scripts\ML_Framework\Data_Combined\Cell_Viability_pickle_files\{filename}", 'wb') as file:
    pickle.dump(dataX_train_dict, file)
    
filename = f"datay_train_dict_4bins_{featurizer_style}.pkl"
# Save the variable tdataX_train_dicto a file
with open(rf"C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Scripts\ML_Framework\Data_Combined\Cell_Viability_pickle_files\{filename}", 'wb') as file:
    pickle.dump(datay_train_dict, file)

filename = f"dataX_test_dict_4bins_{featurizer_style}.pkl"
# Save the variable to a file
with open(rf"C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Scripts\ML_Framework\Data_Combined\Cell_Viability_pickle_files\{filename}", 'wb') as file:
    pickle.dump(dataX_test_dict, file)

filename = f"datay_test_dict_4bins_{featurizer_style}.pkl"
# Save the variable to a file
with open(rf"C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Scripts\ML_Framework\Data_Combined\Cell_Viability_pickle_files\{filename}", 'wb') as file:
    pickle.dump(datay_test_dict, file)


In [None]:
# filename = f"dataX_train_dict_4bins_{featurizer_style}.pkl"
# # Save the variable to a file
# with open(rf"C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Scripts\ML_Framework\Data_Combined\Activity_pickle_files\{filename}", 'rb') as file:
#     dataX_train_dict= pickle.load(file)
    
# filename = f"datay_train_dict_4bins_{featurizer_style}.pkl"
# # Save the variable tdataX_train_dicto a file
# with open(rf"C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Scripts\ML_Framework\Data_Combined\Activity_pickle_files\{filename}", 'rb') as file:
#     datay_train_dict= pickle.load(file)

# filename = f"dataX_test_dict_4bins_{featurizer_style}.pkl"
# # Save the variable to a file
# with open(rf"C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Scripts\ML_Framework\Data_Combined\Activity_pickle_files\{filename}", 'rb') as file:
#     dataX_test_dict= pickle.load(file)

# filename = f"datay_test_dict_4bins_{featurizer_style}.pkl"
# # Save the variable to a file
# with open(rf"C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Scripts\ML_Framework\Data_Combined\Activity_pickle_files\{filename}", 'rb') as file:
#     datay_test_dict= pickle.load(file)

# ML model training

In [None]:
# Parameters for ML model training
num_estimators= 200
num_neighbors= 5
num_random= 42


# Initialize dictionaries to store the output
datay_pred_dict = {}
datay_pred_full_dict = {}

# Iterate over the featurizer functions and ml model functions
for ff in featurizer_map[featurizer_style]:
    featurizer_name = ff.__name__  # Get the featurizer function name
    for mm in ml_model_map[ml_model_style]:
        ml_model_name = mm.__name__  # Get the ml model function name
    
        datay_pred, datay_pred_full= mm(dataX_dict[featurizer_name], dataX_train_dict[featurizer_name], dataX_test_dict[featurizer_name], datay_train_dict[featurizer_name], datay_test_dict[featurizer_name], num_estimators, num_neighbors, num_random)
        
        # Initialize the inner dictionary if it does not exist
        if ml_model_name not in datay_pred_dict:
            datay_pred_dict[ml_model_name] = {}
        if ml_model_name not in datay_pred_full_dict:
            datay_pred_full_dict[ml_model_name] = {}
            
        # Store the output in the dictionaries with a unique key
        datay_pred_dict[ml_model_name][featurizer_name] = datay_pred
        datay_pred_full_dict[ml_model_name][featurizer_name] = datay_pred_full

filename = f"datay_pred_dict_4bins_{featurizer_style}.pkl"
# Save the variable to a file
with open(rf"C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Scripts\ML_Framework\Data_Combined\Cell_Viability_pickle_files\{filename}", 'wb') as file:
    pickle.dump(datay_pred_dict, file)

filename = f"datay_pred_full_dict_4bins_{featurizer_style}.pkl"
# Save the variable to a file
with open(rf"C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Scripts\ML_Framework\Data_Combined\Cell_Viability_pickle_files\{filename}", 'wb') as file:
    pickle.dump(datay_pred_full_dict, file)




In [None]:
# filename = f"datay_pred_dict_4bins_{featurizer_style}.pkl"
# # Save the variable to a file
# with open(rf"C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Scripts\ML_Framework\Data_Combined\Activity_pickle_files\{filename}", 'rb') as file:
#     datay_pred_dict= pickle.load(file)

# filename = f"datay_pred_full_dict_4bins_{featurizer_style}.pkl"
# # Save the variable to a file
# with open(rf"C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Scripts\ML_Framework\Data_Combined\Activity_pickle_files\{filename}", 'rb') as file:
#     datay_pred_full_dict= pickle.load(file)

# Write Classification Report to Excel File

In [None]:
from sklearn.metrics import classification_report, accuracy_score

# Initialize an empty list to store all the rows
all_rows = []

# Iterate over the functions based on the featurizer_style to process classification report
for mm in ml_model_map[ml_model_style]:
    ml_model_name = mm.__name__  # Get the function name
    for ff in featurizer_map[featurizer_style]:
        featurizer_name = ff.__name__  # Get the function name

        predictions= datay_pred_dict[ml_model_name][featurizer_name]
        y_true = datay_test_dict[featurizer_name]

        # Generate classification report
        report_dict= classification_report(y_true, predictions, output_dict=True)
        # Convert the classification report to a DataFrame
        report_df = pd.DataFrame(report_dict).transpose()

        # Flatten the DataFrame into a single row for headers and values
        flattened_report = report_df.reset_index().melt(id_vars=['index'])
        flattened_report.columns = ['Metric', 'Class', 'Value']
        # Prepare headers and values for output
        headers = flattened_report.apply(lambda row: f"{row['Class']}_{row['Metric']}", axis=1)
        values = flattened_report['Value']
        # Output headers and values as a single row
        headers_row = headers.tolist()
        headers_row = ["ML Model Name"] + ["Featurizer Name"] + headers_row
        values_row = [ml_model_name] + [featurizer_name] + values.tolist()
        
        # Create a DataFrame with the headers and values as a single row
        output_df = pd.DataFrame([values_row], columns=headers_row)
        all_rows.append(output_df)

# Concatenate all the rows into a single DataFrame
new_data_df = pd.concat(all_rows, ignore_index=True)

# Check if the output file exists and read existing data if it does
if os.path.exists(output_file):
    with pd.ExcelFile(output_file, engine='openpyxl') as xls:
        if cr_sheet_name in xls.sheet_names:
            existing_data_df = pd.read_excel(xls, sheet_name=cr_sheet_name)
            # Concatenate existing data with new data
            combined_df = pd.concat([existing_data_df, new_data_df], ignore_index=True)
        else:
            combined_df = new_data_df
else:
    combined_df = new_data_df

with pd.ExcelWriter(output_file, engine='openpyxl', mode='a', if_sheet_exists= 'replace') as writer:
    combined_df.to_excel(writer, sheet_name= cr_sheet_name, index=False)



print(f"Finished")




# Write Predicted value to Excel File

In [None]:
# Initialize a DataFrame to store the predictions
new_output_df = pd.DataFrame()
column_name= 'y Test'
new_output_df[column_name] = datay_test_dict[featurizer_name]
# Iterate over the ml model functions and featurizer functions
for ml_model_name, featurizers in datay_pred_dict.items():
    for featurizer_name, predictions in featurizers.items():
        # Create a column name based on ml_model_name and featurizer_name
        column_name = f"{ml_model_name}_{featurizer_name}"
        # Add the predictions to the DataFrame
        new_output_df[column_name] = predictions
# Check if the output file exists and read existing data if it does
if os.path.exists(output_file):
    with pd.ExcelFile(output_file, engine='openpyxl') as xls:
        if ypred_sheet_name in xls.sheet_names:
            existing_data_df = pd.read_excel(xls, sheet_name=ypred_sheet_name)
            # Concatenate existing data with new data
            combined_df = pd.concat([existing_data_df, new_output_df], axis=1)
        else:
            combined_df = new_output_df
else:
    combined_df = new_output_df

# Write the combined DataFrame to the Excel file
with pd.ExcelWriter(output_file, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    combined_df.to_excel(writer, sheet_name=ypred_sheet_name, index=False)



# Initialize a DataFrame to store the predictions
new_output_full_df = pd.DataFrame()
column_name= 'y Full'
new_output_full_df[column_name] = datay_dict[featurizer_name]
# Iterate over the ml model functions and featurizer functions
for ml_model_name, featurizers in datay_pred_full_dict.items():
    for featurizer_name, predictions in featurizers.items():
        # Create a column name based on ml_model_name and featurizer_name
        column_name = f"{ml_model_name}_{featurizer_name}"
        # Add the predictions to the DataFrame
        new_output_full_df[column_name] = predictions
# Check if the output file exists and read existing data if it does
if os.path.exists(output_file):
    with pd.ExcelFile(output_file, engine='openpyxl') as xls:
        if ypred_sheet_name in xls.sheet_names:
            existing_data_df = pd.read_excel(xls, sheet_name=ypred_full_sheet_name)
            # Concatenate existing data with new data
            combined_df = pd.concat([existing_data_df, new_output_full_df], axis=1)
        else:
            combined_df = new_output_full_df
else:
    combined_df = new_output_full_df

# Write the combined DataFrame to the Excel file
with pd.ExcelWriter(output_file, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    combined_df.to_excel(writer, sheet_name=ypred_full_sheet_name, index=False)




# Calculate Accuracy for each class and Write to excel file

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Calculate accuracy for each class


# Initialize an empty dictionary to store the accuracies
accuracy_dict = {}
precision_dict = {}
recall_dict = {}
f1score_dict = {}

# Iterate over the functions based on the featurizer_style to process classification report
for mm in ml_model_map[ml_model_style]:
    ml_model_name = mm.__name__  # Get the function name
    accuracy_dict[ml_model_name] = {}
    precision_dict[ml_model_name] = {}
    recall_dict[ml_model_name] = {}
    f1score_dict[ml_model_name] = {}
    for ff in featurizer_map[featurizer_style]:
        featurizer_name = ff.__name__  # Get the function name
        predicted_class= datay_pred_dict[ml_model_name][featurizer_name]
        true_class = datay_test_dict[featurizer_name]
        
        classes = np.unique(true_class)
        accuracy_per_class = {}
        precision_per_class = {}
        recall_per_class = {}
        f1score_per_class = {}
        for cls in classes:
            # Binary classification: 1 for current class, 0 for all other classes
            y_true_binary = [1 if y == cls else 0 for y in true_class]
            y_pred_binary = [1 if y == cls else 0 for y in predicted_class]
            accuracy = accuracy_score(y_true_binary, y_pred_binary)
            precision = precision_score(y_true_binary, y_pred_binary)
            recall = recall_score(y_true_binary, y_pred_binary)
            f1score = f1_score(y_true_binary, y_pred_binary)
            
            accuracy_per_class[cls] = accuracy
            precision_per_class[cls] = precision
            recall_per_class[cls] = recall
            f1score_per_class[cls] = f1score

        accuracy_dict[ml_model_name][featurizer_name] = accuracy_per_class
        precision_dict[ml_model_name][featurizer_name] = precision_per_class
        recall_dict[ml_model_name][featurizer_name] = recall_per_class
        f1score_dict[ml_model_name][featurizer_name] = f1score_per_class

# Convert the nested dictionary to a DataFrame
accuracy_df = pd.DataFrame.from_dict({(i,j): accuracy_dict[i][j] 
                                       for i in accuracy_dict.keys() 
                                       for j in accuracy_dict[i].keys()},
                                      orient='index')
# Reset the index to make ml_model_name and featurizer_name columns
accuracy_df.reset_index(inplace=True)
accuracy_df.columns = ['ML Model Name', 'Featurizer Name'] + list(accuracy_df.columns[2:])


precision_df = pd.DataFrame.from_dict({(i,j): precision_dict[i][j] 
                                       for i in precision_dict.keys() 
                                       for j in precision_dict[i].keys()},
                                      orient='index')
# Reset the index to make ml_model_name and featurizer_name columns
precision_df.reset_index(inplace=True)
precision_df.columns = ['ML Model Name', 'Featurizer Name'] + list(precision_df.columns[2:])


recall_df = pd.DataFrame.from_dict({(i,j): recall_dict[i][j] 
                                       for i in recall_dict.keys() 
                                       for j in recall_dict[i].keys()},
                                      orient='index')
# Reset the index to make ml_model_name and featurizer_name columns
recall_df.reset_index(inplace=True)
recall_df.columns = ['ML Model Name', 'Featurizer Name'] + list(recall_df.columns[2:])


f1score_df = pd.DataFrame.from_dict({(i,j): f1score_dict[i][j] 
                                       for i in f1score_dict.keys() 
                                       for j in f1score_dict[i].keys()},
                                      orient='index')
# Reset the index to make ml_model_name and featurizer_name columns
f1score_df.reset_index(inplace=True)
f1score_df.columns = ['ML Model Name', 'Featurizer Name'] + list(f1score_df.columns[2:])


# Check if the output file exists and read existing data if it does
if os.path.exists(output_file):
    with pd.ExcelFile(output_file, engine='openpyxl') as xls:
        if accuracy_sheet_name in xls.sheet_names:
            existing_data_df = pd.read_excel(xls, sheet_name=accuracy_sheet_name)
            # Concatenate existing data with new data
            combined_df = pd.concat([existing_data_df, accuracy_df], ignore_index=True)
        else:
            combined_df = accuracy_df
else:
    combined_df = accuracy_df

# Write the combined DataFrame to the Excel file
with pd.ExcelWriter(output_file, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    combined_df.to_excel(writer, sheet_name=accuracy_sheet_name, index=False)


# Check if the output file exists and read existing data if it does
if os.path.exists(output_file):
    with pd.ExcelFile(output_file, engine='openpyxl') as xls:
        if precision_sheet_name in xls.sheet_names:
            existing_data_df = pd.read_excel(xls, sheet_name=precision_sheet_name)
            # Concatenate existing data with new data
            combined_df = pd.concat([existing_data_df, precision_df], ignore_index=True)
        else:
            combined_df = precision_df
else:
    combined_df = precision_df

# Write the combined DataFrame to the Excel file
with pd.ExcelWriter(output_file, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    combined_df.to_excel(writer, sheet_name=precision_sheet_name, index=False)


# Check if the output file exists and read existing data if it does
if os.path.exists(output_file):
    with pd.ExcelFile(output_file, engine='openpyxl') as xls:
        if recall_sheet_name in xls.sheet_names:
            existing_data_df = pd.read_excel(xls, sheet_name=recall_sheet_name)
            # Concatenate existing data with new data
            combined_df = pd.concat([existing_data_df, recall_df], ignore_index=True)
        else:
            combined_df = recall_df
else:
    combined_df = recall_df

# Write the combined DataFrame to the Excel file
with pd.ExcelWriter(output_file, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    combined_df.to_excel(writer, sheet_name=recall_sheet_name, index=False)


# Check if the output file exists and read existing data if it does
if os.path.exists(output_file):
    with pd.ExcelFile(output_file, engine='openpyxl') as xls:
        if f1score_sheet_name in xls.sheet_names:
            existing_data_df = pd.read_excel(xls, sheet_name=f1score_sheet_name)
            # Concatenate existing data with new data
            combined_df = pd.concat([existing_data_df, f1score_df], ignore_index=True)
        else:
            combined_df = f1score_df
else:
    combined_df = f1score_df

# Write the combined DataFrame to the Excel file
with pd.ExcelWriter(output_file, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    combined_df.to_excel(writer, sheet_name=f1score_sheet_name, index=False)