In [15]:
import pandas as pd
from sklearn.metrics import roc_auc_score, confusion_matrix

def AUC_Proteins(excel_file, remove_outliers: bool) -> pd.DataFrame:
    # Read Excel data
    data = pd.read_excel(excel_file)
    
    # Handle missing protein names
    data.columns = data.columns.fillna('Protein_name_is_missing')
    for i in range(data.columns.shape[0]):
        for j in range(data.columns.shape[0]):
            if data.columns.values[i] == 'nan.%s' % j:
                data.columns.values[i] = 'Protein_name_is_missing'
    
    results = []
    
    for i in range(1, data.columns.shape[0]):
        cases = data.iloc[data.loc[data['Group'] == 1].index, i].drop_duplicates()
        controls = data.iloc[data.loc[data['Group'] == 0].index, i].drop_duplicates()
        
        if remove_outliers:
            # Remove outliers for cases and controls
            cases = cases[((cases - cases.mean()) / cases.std()).abs() <= 2]
            controls = controls[((controls - controls.mean()) / controls.std()).abs() <= 2]
        
        # Combine the cases and controls into a single list
        labels = [1] * len(cases) + [0] * len(controls)
        # Combine the measurements for cases and controls into a single list
        variables = cases.tolist() + controls.tolist()
        
        # Compute the AUC-ROC score
        auc_roc = roc_auc_score(labels, variables)
        if auc_roc < 0.5:
            auc_roc = 1 - auc_roc
        
        # Calculate confusion matrix
        threshold = sum(variables) / len(variables)
        tn, fp, fn, tp = confusion_matrix(labels, [1 if val >= threshold else 0 for val in variables]).ravel()
        
        # Calculate sensitivity and specificity
        sensitivity = tp / (tp + fn)
        specificity = tn / (tn + fp)
        
        results.append((data.columns.values[i], auc_roc, sensitivity, specificity))
    
    df = pd.DataFrame(results, columns=['Protein', 'AUC value', 'Sensitivity', 'Specificity'])
    return df

In [16]:
# Example usage
excel_file_path ='Rocsample.xlsx'
remove_outliers_flag = True  # Set this according to your requirement
#sheet_name = "1"

result_df = AUC_Proteins(excel_file_path, remove_outliers_flag)
print(result_df)

KeyError: 'Group'