# Categorical Labeling based on pre-determined thresholds

# Activity 2 Bins

In [1]:
import pandas as pd
import numpy as np

# Function to read data from Excel
def read_data(file_path, sheet_name):
    try:
        data = pd.read_excel(file_path, sheet_name=sheet_name, engine='openpyxl')
        print(f"Data read from sheet '{sheet_name}' (first 5 rows):")
        print(data.head())
        if data.empty:
            print(f"Warning: The sheet '{sheet_name}' is empty or contains only headers.")
        return data
    except Exception as e:
        print(f"Error reading '{sheet_name}' from {file_path}: {e}")
        return None



# Function to classify data based on thresholds
def classify_data(data, thresholds):
    def classify(value):
        if value < thresholds[0]:
            return "low"
        else:
            return "high"
    
    # Apply classification to each element
    classified_data = data.applymap(classify)
    return classified_data

# Function to save classified labels to a new sheet in the same Excel file
def save_classified_data(classified_data, file_path, sheet_name):
    try:
        with pd.ExcelWriter(file_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
            classified_data.to_excel(writer, sheet_name=sheet_name, index=False)
        print(f"Classified data successfully saved to sheet '{sheet_name}' in {file_path}")
    except Exception as e:
        print(f"Error saving classified data to {file_path}: {e}")

# Main function
def main():
    file_path = r'C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Data\Siegwart_group_Data8_2023\Activity\InVitro_Activity.xlsx'  # Input file path
    normalized_sheet_name = "Min-Max Normalized"  # Sheet containing normalized data
    classified_sheet_name = "Min-Max 2 bins PD"  # Sheet to save classified labels
    
    # Read normalized data
    data = read_data(file_path, normalized_sheet_name)
    if data is None or data.empty:
        print("Data could not be read or is empty. Exiting.")
        return

    # Determine thresholds based on K-means clustering
    thresholds = [0.5]

    # Classify data based on K-means thresholds
    classified_data = classify_data(data, thresholds)
    print("Classified Data (first 5 rows):")
    print(classified_data.head())
    
    # Save classified labels
    save_classified_data(classified_data, file_path, classified_sheet_name)

if __name__ == "__main__":
    main()


Data read from sheet 'Min-Max Normalized' (first 5 rows):
   Normalized Data
0         0.000000
1         0.444444
2         0.000000
3         0.444444
4         0.000000
Classified Data (first 5 rows):
  Normalized Data
0             low
1             low
2             low
3             low
4             low
Classified data successfully saved to sheet 'Min-Max 2 bins PD' in C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Data\Siegwart_group_Data8_2023\Activity\InVitro_Activity.xlsx


# Activity 4 bins

In [2]:
import pandas as pd
import numpy as np

# Function to read data from Excel
def read_data(file_path, sheet_name):
    try:
        data = pd.read_excel(file_path, sheet_name=sheet_name, engine='openpyxl')
        print(f"Data read from sheet '{sheet_name}' (first 5 rows):")
        print(data.head())
        if data.empty:
            print(f"Warning: The sheet '{sheet_name}' is empty or contains only headers.")
        return data
    except Exception as e:
        print(f"Error reading '{sheet_name}' from {file_path}: {e}")
        return None



# Function to classify data based on thresholds
def classify_data(data, thresholds):
    def classify(value):
        if value < thresholds[0]:
            return "low"
        elif value < thresholds[1]:
            return "low-mid"
        elif value < thresholds[2]:
            return "mid-high"
        else:
            return "high"
    
    # Apply classification to each element
    classified_data = data.applymap(classify)
    return classified_data

# Function to save classified labels to a new sheet in the same Excel file
def save_classified_data(classified_data, file_path, sheet_name):
    try:
        with pd.ExcelWriter(file_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
            classified_data.to_excel(writer, sheet_name=sheet_name, index=False)
        print(f"Classified data successfully saved to sheet '{sheet_name}' in {file_path}")
    except Exception as e:
        print(f"Error saving classified data to {file_path}: {e}")

# Main function
def main():
    file_path = r'C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Data\Siegwart_group_Data8_2023\Activity\InVitro_Activity.xlsx'  # Input file path
    normalized_sheet_name = "Min-Max Normalized"  # Sheet containing normalized data
    classified_sheet_name = "Min-Max 4 bins PD"  # Sheet to save classified labels
    num_clusters = 3  # Number of clusters for K-means
    
    # Read normalized data
    data = read_data(file_path, normalized_sheet_name)
    if data is None or data.empty:
        print("Data could not be read or is empty. Exiting.")
        return

    # Determine thresholds based on K-means clustering
    thresholds = [0.2, 0.5, 0.8]

    # Classify data based on K-means thresholds
    classified_data = classify_data(data, thresholds)
    print("Classified Data (first 5 rows):")
    print(classified_data.head())
    
    # Save classified labels
    save_classified_data(classified_data, file_path, classified_sheet_name)

if __name__ == "__main__":
    main()


Data read from sheet 'Min-Max Normalized' (first 5 rows):
   Normalized Data
0         0.000000
1         0.444444
2         0.000000
3         0.444444
4         0.000000
Classified Data (first 5 rows):
  Normalized Data
0             low
1         low-mid
2             low
3         low-mid
4             low
Classified data successfully saved to sheet 'Min-Max 4 bins PD' in C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Data\Siegwart_group_Data8_2023\Activity\InVitro_Activity.xlsx


# Cell Viability 2 bins

In [1]:
import pandas as pd
import numpy as np

# Function to read data from Excel
def read_data(file_path, sheet_name):
    try:
        data = pd.read_excel(file_path, sheet_name=sheet_name, engine='openpyxl')
        print(f"Data read from sheet '{sheet_name}' (first 5 rows):")
        print(data.head())
        if data.empty:
            print(f"Warning: The sheet '{sheet_name}' is empty or contains only headers.")
        return data
    except Exception as e:
        print(f"Error reading '{sheet_name}' from {file_path}: {e}")
        return None



# Function to classify data based on thresholds
def classify_data(data, thresholds):
    def classify(value):
        if value < thresholds[0]:
            return "mid-high"
        else:
            return "high"
    
    # Apply classification to each element
    classified_data = data.applymap(classify)
    return classified_data

# Function to save classified labels to a new sheet in the same Excel file
def save_classified_data(classified_data, file_path, sheet_name):
    try:
        with pd.ExcelWriter(file_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
            classified_data.to_excel(writer, sheet_name=sheet_name, index=False)
        print(f"Classified data successfully saved to sheet '{sheet_name}' in {file_path}")
    except Exception as e:
        print(f"Error saving classified data to {file_path}: {e}")

# Main function
def main():
    file_path = r'C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Data\Anderson_group_Data6_2013\Cell_Viability\InVitro_Cell_Viability.xlsx'  # Input file path
    normalized_sheet_name = "Min-Max Normalized"  # Sheet containing normalized data
    classified_sheet_name = "Min-Max 2 bins PD"  # Sheet to save classified labels
    num_clusters = 3  # Number of clusters for K-means
    
    # Read normalized data
    data = read_data(file_path, normalized_sheet_name)
    if data is None or data.empty:
        print("Data could not be read or is empty. Exiting.")
        return

    # Determine thresholds based on K-means clustering
    thresholds = [0.70]

    # Classify data based on K-means thresholds
    classified_data = classify_data(data, thresholds)
    print("Classified Data (first 5 rows):")
    print(classified_data.head())
    
    # Save classified labels
    save_classified_data(classified_data, file_path, classified_sheet_name)

if __name__ == "__main__":
    main()


Data read from sheet 'Min-Max Normalized' (first 5 rows):
   Normalized Data
0         0.604167
1         0.708333
2         0.807292
3         0.843750
4         0.708333
Classified Data (first 5 rows):
  Normalized Data
0        mid-high
1            high
2            high
3            high
4            high
Classified data successfully saved to sheet 'Min-Max 2 bins PD' in C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Data\Anderson_group_Data6_2013\Cell_Viability\InVitro_Cell_Viability.xlsx


# Cell Viability 4 bins

In [2]:
import pandas as pd
import numpy as np

# Function to read data from Excel
def read_data(file_path, sheet_name):
    try:
        data = pd.read_excel(file_path, sheet_name=sheet_name, engine='openpyxl')
        print(f"Data read from sheet '{sheet_name}' (first 5 rows):")
        print(data.head())
        if data.empty:
            print(f"Warning: The sheet '{sheet_name}' is empty or contains only headers.")
        return data
    except Exception as e:
        print(f"Error reading '{sheet_name}' from {file_path}: {e}")
        return None



# Function to classify data based on thresholds
def classify_data(data, thresholds):
    def classify(value):
        if value < thresholds[0]:
            return "low"
        elif value < thresholds[1]:
            return "low-mid"
        elif value < thresholds[2]:
            return "mid-high"
        else:
            return "high"
    
    # Apply classification to each element
    classified_data = data.applymap(classify)
    return classified_data

# Function to save classified labels to a new sheet in the same Excel file
def save_classified_data(classified_data, file_path, sheet_name):
    try:
        with pd.ExcelWriter(file_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
            classified_data.to_excel(writer, sheet_name=sheet_name, index=False)
        print(f"Classified data successfully saved to sheet '{sheet_name}' in {file_path}")
    except Exception as e:
        print(f"Error saving classified data to {file_path}: {e}")

# Main function
def main():
    file_path = r'C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Data\Anderson_group_Data6_2013\Cell_Viability\InVitro_Cell_Viability.xlsx'  # Input file path
    normalized_sheet_name = "Min-Max Normalized"  # Sheet containing normalized data
    classified_sheet_name = "Min-Max 4 bins PD"  # Sheet to save classified labels
    num_clusters = 3  # Number of clusters for K-means
    
    # Read normalized data
    data = read_data(file_path, normalized_sheet_name)
    if data is None or data.empty:
        print("Data could not be read or is empty. Exiting.")
        return

    # Determine thresholds based on K-means clustering
    thresholds = [-0.40, 0.62, 0.70]

    # Classify data based on K-means thresholds
    classified_data = classify_data(data, thresholds)
    print("Classified Data (first 5 rows):")
    print(classified_data.head())
    
    # Save classified labels
    save_classified_data(classified_data, file_path, classified_sheet_name)

if __name__ == "__main__":
    main()


Data read from sheet 'Min-Max Normalized' (first 5 rows):
   Normalized Data
0         0.604167
1         0.708333
2         0.807292
3         0.843750
4         0.708333
Classified Data (first 5 rows):
  Normalized Data
0         low-mid
1            high
2            high
3            high
4            high
Classified data successfully saved to sheet 'Min-Max 4 bins PD' in C:\Users\grvkr\Box\Gaurav Kumar\Purdue_Work\SAR_NM\Data\Anderson_group_Data6_2013\Cell_Viability\InVitro_Cell_Viability.xlsx
