In [1]:
import os
import pandas as pd

In [2]:
def sort_files_by_first_number(folder_path):
    """
    Sorts all .txt files in a given folder by the first number in each line.
    If a file contains a line that doesn't start with a number, prints a ValueError and skips the file.
    """
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".txt"):
            file_path = os.path.join(folder_path, file_name)

            # Read the lines from the file
            with open(file_path, "r") as f:
                lines = f.readlines()

            try:
                # Sort the lines based on the first number in each line
                lines = sorted(lines, key=lambda x: int(x.split()[0]))
            except ValueError:
                # If there's aw ValueError, print the file name and continue with the next file
                print(f"ValueError in file {file_name}")
                continue

            # Write the sorted lines back to the file
            with open(file_path, "w") as f:
                for line in lines:
                    f.write(line.strip() + "\n")


In [3]:
def create_group_answer_sheet(folder_path):
    # Create an empty dataframe to store the results
    columns = ['filename', '_01_', '_04_', '_05_06_']
    df = pd.DataFrame(columns=columns)

    for filename in os.listdir(folder_path):
        if filename.endswith('.txt'):
            try:
                # Extract the file information
                file_data = {}
                with open(os.path.join(folder_path, filename), 'r') as file:
                    for line in file:
                        values = line.split()
                        class_number = int(values[0])
                        confidence = float(values[5])
                        file_data[class_number] = (confidence, values[1])

                # Get the class with the highest confidence value for each group
                group1_values = [(file_data.get(c, (0, None))[0], c) for c in [2, 3, 8]]
                group2_values = [(file_data.get(c, (0, None))[0], c) for c in [4, 5, 9]]
                group3_values = [(file_data.get(c, (0, None))[0], c) for c in [6, 7, 11]]
                max_group1 = max(group1_values)[1] if max(group1_values)[0] != 0 else ''
                max_group2 = max(group2_values)[1] if max(group2_values)[0] != 0 else ''
                max_group3 = max(group3_values)[1] if max(group3_values)[0] != 0 else ''

                # Create a row for the dataframe
                row = [filename, max_group1, max_group2, max_group3]
                df = df.append(pd.DataFrame([row], columns=columns), ignore_index=True)

            except PermissionError:
                print(f"Permission denied: {filename}")

    # Replace values in the dataframe
    replace_map = {2: 'X', 3: 'O', 4: 'X', 5: 'O', 6: 'X',7: 'O', 8: '', 9: '', 11:'p6'}
    df.replace(replace_map, inplace=True)

    # Save the dataframe to a CSV file
    df.to_csv(folder_path + '/0.group_answer_sheet.csv', index=False)


In [4]:
import pandas as pd

def generate_answer_sheet(folder_path):
    # Open the CSV file and create a dataframe
    df = pd.read_csv(folder_path + '/0.group_answer_sheet.csv')

    # Create empty columns for the answers
    df['answer1'] = ''
    df['answer2'] = ''
    df['answer3'] = ''

    # Loop through the dataframe in groups of three rows
    for i in range(0, len(df), 3):
        # Get the values from each row in the group
        row1 = df.iloc[i]
        row2 = df.iloc[i+1]
        row3 = df.iloc[i+2]

        # Determine the most common value in each group and update the answer columns(group1)
        for j, group in enumerate(['_01_']):
            values = [row1[group],row2[group], row3[group]]
            o_count = values.count('O')
            x_count = values.count('X')
            if o_count == 0 and x_count == 0:
                df.at[i, 'answer1'] = ''
            elif x_count >= o_count:
                df.at[i, 'answer1'] = 'X'
            else:
                df.at[i, 'answer1'] = 'O'

        
        # Determine the most common value in each group and update the answer columns(group2)
        for j, group in enumerate(['_04_']):
            values = [row1[group],row2[group], row3[group]]
            o_count = values.count('O')
            x_count = values.count('X')
            if o_count == 0 and x_count == 0:
                df.at[i, 'answer2'] = ''
            elif o_count >= x_count:
                df.at[i, 'answer2'] = 'O'
            else:
                df.at[i, 'answer2'] = 'X'
                

        # Determine the most common value in each group and update the answer columns(group3)
        for j, group in enumerate(['_05_06_']):
            values = [row1[group],row2[group], row3[group]]
            if 'p6' in values:
                df.at[i, 'answer3'] = 'p6'
            else:
                o_count = values.count('O')
                x_count = values.count('X')
                if o_count == 0 and x_count == 0:
                    df.at[i, 'answer3'] = ''
                elif o_count >= x_count:
                    df.at[i, 'answer3'] = 'O'
                else:
                    df.at[i, 'answer3'] = 'X'


    # Loop through the dataframe in groups of three rows
    for i in range(0, len(df), 6):
        # Get the values from each row in the group
        row1 = df.iloc[i]
        row2 = df.iloc[i+1]
        row3 = df.iloc[i+2]
        row4 = df.iloc[i+3]
        row5 = df.iloc[i+4]
        row6 = df.iloc[i+5]

        # Determine the most common value in each group and update the answer columns(group3)
        for j, group in enumerate(['_05_06_']):
            values = [row1[group], row2[group], row3[group], row4[group], row5[group], row6[group]]
            if 'p6' in values:
                df.at[i, 'answer3'] = 'p6'
                df.at[i+3, 'answer3'] = 'p6'
            

    # Save the updated dataframe to a new CSV file
    df.to_csv(folder_path + '/0.answer_sheet_total.csv', index=False)


In [None]:
import pandas as pd

def generate_answer_sheet(folder_path):
    # Open the CSV file and create a dataframe
    df = pd.read_csv(folder_path + '/0.group_answer_sheet.csv')

    # Create empty columns for the answers
    df['answer1'] = ''
    df['answer2'] = ''
    df['answer3'] = ''

    # Loop through the dataframe in groups of three rows
    for i in range(0, len(df), 3):
        # Get the values from each row in the group
        row1 = df.iloc[i]
        row2 = df.iloc[i+1]
        row3 = df.iloc[i+2]

        # Determine the most common value in each group and update the answer columns(group1)
        for j, group in enumerate(['_01_']):
            values = [row1[group],row2[group], row3[group]]
            o_count = values.count('O')
            x_count = values.count('X')
            if o_count == 0 and x_count == 0:
                df.at[i, 'answer1'] = ''
            elif x_count >= o_count:
                df.at[i, 'answer1'] = 'X'
            else:
                df.at[i, 'answer1'] = 'O'

        
        # Determine the most common value in each group and update the answer columns(group2)
        for j, group in enumerate(['_04_']):
            values = [row1[group],row2[group], row3[group]]
            o_count = values.count('O')
            x_count = values.count('X')
            if o_count == 0 and x_count == 0:
                df.at[i, 'answer2'] = ''
            elif o_count >= x_count:
                df.at[i, 'answer2'] = 'O'
            else:
                df.at[i, 'answer2'] = 'X'
                

        # Determine the most common value in each group and update the answer columns(group3)
        for j, group in enumerate(['_05_06_']):
            values = [row1[group],row2[group], row3[group]]
            if 'p6' in values:
                df.at[i, 'answer3'] = 'p6'
            else:
                o_count = values.count('O')
                x_count = values.count('X')
                if o_count == 0 and x_count == 0:
                    df.at[i, 'answer3'] = ''
                elif o_count >= x_count:
                    df.at[i, 'answer3'] = 'O'
                else:
                    df.at[i, 'answer3'] = 'X'


    # Loop through the dataframe in groups of three rows
    for i in range(0, len(df), 6):
        # Get the values from each row in the group
        row1 = df.iloc[i]
        row2 = df.iloc[i+1]
        row3 = df.iloc[i+2]
        row4 = df.iloc[i+3]
        row5 = df.iloc[i+4]
        row6 = df.iloc[i+5]

        # Determine the most common value in each group and update the answer columns(group3)
        for j, group in enumerate(['_05_06_']):
            values = [row1[group], row2[group], row3[group], row4[group], row5[group], row6[group]]
            if 'p6' in values:
                df.at[i, 'answer3'] = 'p6'
                df.at[i+3, 'answer3'] = 'p6'
            elif 'p6' not in values and 'O' in values:
                df.at[i. 'answer3'] = 'p6'
                df.at[i+3, 'answer3'] = 'p6'

    # Save the updated dataframe to a new CSV file
    df.to_csv(folder_path + '/0.answer_sheet_total2.csv', index=False)


In [5]:
folder_path = 'C:/Users/GnTRnD/Desktop/LSR/yolov5_lsr/runs/detect/K-city-Assesment14/labels'

sort_files_by_first_number(folder_path)
create_group_answer_sheet(folder_path)
generate_answer_sheet(folder_path)
extract_and_modify_data(folder_path)