In [None]:
# Step 1: Import necessary libraries
import json
import os
from collections import defaultdict


In [None]:
# Step 2: Load the JSON file with digit predictions
# Load the metadata JSON file
json_file_path = 'all_digit_objects_with_predictions_organized.json'

with open(json_file_path, 'r') as json_file:
    loaded_metadata = json.load(json_file)

print("Loaded metadata successfully.")


In [None]:
# Step 3: Extract and organize digits into strings for each cell

# New dictionary structure: per image -> per column -> per cell -> corresponding string of digits
all_strings = defaultdict(lambda: defaultdict(lambda: defaultdict(str)))

# Iterate through the images, columns, and cells in the loaded metadata
for image_id, columns in loaded_metadata.items():
    for col_name, digit_list in columns.items():
        # Group digits by cell and sort them based on position
        cell_dict = defaultdict(list)

        for digit_obj in digit_list:
            # Store each digit in the corresponding cell along with its properties
            cell_number = digit_obj['cell_number']
            x_min = digit_obj['x_min']
            y_min = digit_obj['y_min']
            prediction = digit_obj['prediction']

            cell_dict[cell_number].append({
                'x_min': x_min,
                'y_min': y_min,
                'prediction': prediction
            })

        # For each cell, sort the digits and construct the final string
        for cell_number, digits in cell_dict.items():
            # Sort the digits in the order of x_min (left to right)
            sorted_digits = sorted(digits, key=lambda d: (d['x_min'], d['y_min']))

            # Construct the string from sorted digits
            digit_string = ''.join(str(d['prediction']) for d in sorted_digits)

            # Save the result in the new dictionary
            all_strings[image_id][col_name][cell_number] = digit_string

print("Strings constructed successfully.")


In [None]:
# Step 4: Save the new dictionary with strings to a JSON file
output_file_path = 'all_digit_strings.json'

with open(output_file_path, 'w') as json_file:
    json.dump(all_strings, json_file, indent=4)

print(f"Strings saved successfully to {output_file_path}.")


In [None]:
# import json
# import pandas as pd
# 
# # Step 1: Load the metadata JSON file
# input_file_path = 'all_digit_strings.json'
# with open(input_file_path, 'r') as json_file:
#     loaded_metadata = json.load(json_file)
# 
# # Step 2: Initialize an empty list to store row dictionaries
# rows = []
# 
# # Step 3: Populate the list with dictionaries representing each row
# for image_id, columns_data in loaded_metadata.items():
#     # Create a set to track row numbers across all columns
#     row_numbers = set()
# 
#     # Collect all unique row numbers from the cell keys (e.g., A1, C2, D3)
#     for col_name, cell_data in columns_data.items():
#         row_numbers.update(int(cell[1:]) for cell in cell_data.keys())
# 
#     # Create rows for each row number found
#     for row_num in sorted(row_numbers):
#         row_data = {'Image': image_id, 'A': None, 'B': None, 'C': None, 'D': None, 'E': None}
# 
#         for col_name, cell_data in columns_data.items():
#             cell_key = f"{col_name}{row_num}"
#             if cell_key in cell_data:
#                 row_data[col_name] = cell_data[cell_key]
# 
#         rows.append(row_data)
# 
# # Step 4: Convert the list of rows into a DataFrame
# df = pd.DataFrame(rows)
# 
# # Step 5: Save the DataFrame to an Excel file
# output_excel_path = 'digit_strings_output_dataframe_final.xlsx'
# df.to_excel(output_excel_path, index=False)
# 
# print(f"Data saved successfully to {output_excel_path}")


In [None]:
import json
import pandas as pd

# Step 1: Load the metadata JSON file
input_file_path = 'all_digit_strings.json'
with open(input_file_path, 'r') as json_file:
    loaded_metadata = json.load(json_file)

# Step 2: Initialize an empty list to store row dictionaries
rows = []

# Step 3: Populate the list with dictionaries representing each row
for image_id, columns_data in loaded_metadata.items():
    # Add an empty separator row with the image ID
    rows.append({'Row Number': '', 'Image': f"Image ID: {image_id}", 'A': '', 'B': '', 'C': '', 'D': '', 'E': ''})

    # Create a set to track row numbers across all columns
    row_numbers = set()

    # Collect all unique row numbers from the cell keys (e.g., A1, C2, D3)
    for col_name, cell_data in columns_data.items():
        row_numbers.update(int(cell[1:]) for cell in cell_data.keys())

    # Create rows for each row number found
    for row_num in sorted(row_numbers):
        row_data = {'Row Number': row_num, 'Image': '', 'A': None, 'B': None, 'C': None, 'D': None, 'E': None}

        for col_name, cell_data in columns_data.items():
            cell_key = f"{col_name}{row_num}"
            if cell_key in cell_data:
                row_data[col_name] = cell_data[cell_key]

        rows.append(row_data)

# Step 4: Convert the list of rows into a DataFrame
df = pd.DataFrame(rows)

# Step 5: Save the DataFrame to an Excel file
output_excel_path = 'digit_strings_output_dataframe_final.xlsx'
df.to_excel(output_excel_path, index=False)

print(f"Data saved successfully to {output_excel_path}")
