In [2]:
import os
import pandas as pd

def read_and_label_file(file_path, label):
    try:
        df = pd.read_excel(file_path, header=None)  # Read without headers
        df = df.T  # Transpose the DataFrame
        df.columns = df.iloc[0]  # Set the first row as the header
        df = df.drop(df.index[0])  # Drop the first row
        df = df.drop(df.columns[0], axis=1)  # Drop the first column after transposing
        df['Label'] = label
        return df
    except FileNotFoundError:
        print(f"Error: The file {file_path} was not found.")
        return pd.DataFrame()  # Return an empty DataFrame if the file is not found

# Define file paths
data_dir = 'data'
file_names = ['late917.xlsx', 'soon917.xlsx', 'late1725.xlsx', 'soon1725.xlsx']
labels = ['L', 'S', 'L', 'S']

# Read, label, and save transposed files
transposed_file_names = []
for file_name, label in zip(file_names, labels):
    file_path = os.path.join(data_dir, file_name)
    df = read_and_label_file(file_path, label)
    if not df.empty:
        transposed_file_name = f"transposed_{file_name}"
        transposed_file_path = os.path.join(data_dir, transposed_file_name)
        df.to_excel(transposed_file_path, index=False)
        transposed_file_names.append(transposed_file_name)

# Read transposed files for combining
dataframes = []
for transposed_file_name in transposed_file_names:
    transposed_file_path = os.path.join(data_dir, transposed_file_name)
    df = pd.read_excel(transposed_file_path)
    dataframes.append(df)

# Combine datasets
if len(dataframes) == 4:
    combined_917 = pd.concat(dataframes[:2], ignore_index=True)
    combined_917.to_excel(os.path.join(data_dir, 'combined_917.xlsx'), index=False)

    combined_1725 = pd.concat(dataframes[2:], ignore_index=True)
    combined_1725.to_excel(os.path.join(data_dir, 'combined_1725.xlsx'), index=False)

    combined_all = pd.concat([combined_917, combined_1725], ignore_index=True)
    combined_all.to_excel(os.path.join(data_dir, 'combined_all.xlsx'), index=False)
else:
    print("Error: Not all files were successfully read. Combined datasets will not be created.")
