In [3]:
import os
import array
import numpy as np
import pandas as pd
import imageio  # Use imageio for saving image
from concurrent.futures import ThreadPoolExecutor # for parallelization
import shutil

In [14]:
## THIS IS FOR ALL FILES IN THE TRAIN FOLDER. USES PARALLELIZATION

from concurrent.futures import ThreadPoolExecutor

# Define the directory where the .asm files are stored and the output directory
asm_dir = 'train'  # Folder containing .asm files
output_dir = 'asm_images'  # Folder to save the converted PNG images

# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

def convert_asm_to_png(filename):
    if filename.endswith('.asm'):
        asm_file_path = os.path.join(asm_dir, filename)
        
        # Open the .asm file in binary mode
        with open(asm_file_path, 'rb') as f:
            ln = os.path.getsize(asm_file_path)  # Get the file size in bytes
            
            # Determine the image width as the square root of the file size
            width = int(ln ** 0.5)
            rem = ln % width  # Calculate the remainder
            
            # Create an unsigned 8-bit array and read the file content
            a = array.array("B")
            a.fromfile(f, ln - rem)  # Read the file content into the array, ignoring the remainder

        # Reshape the array into a 2D array for image representation
        g = np.reshape(a, (len(a) // width, width))
        g = np.uint8(g)  # Convert the data to uint8 type

        # Save the grayscale image using imageio
        output_image_path = os.path.join(output_dir, f'{filename.replace(".asm", ".png")}')
        imageio.imwrite(output_image_path, g)

        print(f"Converted {filename} to {output_image_path}")

# Get all .asm files from the directory
asm_files = [filename for filename in os.listdir(asm_dir) if filename.endswith('.asm')]

# Use ThreadPoolExecutor to parallelize the conversion process
with ThreadPoolExecutor() as executor:
    executor.map(convert_asm_to_png, asm_files)


Converted 0eN9lyQfwmTVk7C2ZoYp.asm to asm_images\0eN9lyQfwmTVk7C2ZoYp.png
Converted 04hSzLv5s2TDYPlcgpHB.asm to asm_images\04hSzLv5s2TDYPlcgpHB.png
Converted 0hZEqJ5eMVjU21HAG7Ii.asm to asm_images\0hZEqJ5eMVjU21HAG7Ii.png
Converted 0aVNj3qFgEZI6Akf4Kuv.asm to asm_images\0aVNj3qFgEZI6Akf4Kuv.png
Converted 0ItXlAUOhK8ZYdDf7HW4.asm to asm_images\0ItXlAUOhK8ZYdDf7HW4.png
Converted 0DbLeKSoxu47wjqVHsi9.asm to asm_images\0DbLeKSoxu47wjqVHsi9.png
Converted 08BX5Slp2I1FraZWbc6j.asm to asm_images\08BX5Slp2I1FraZWbc6j.png
Converted 0K4sTCLtrIJ5SinQbe7u.asm to asm_images\0K4sTCLtrIJ5SinQbe7u.png
Converted 0LAXajqhQy7po16dw8Tx.asm to asm_images\0LAXajqhQy7po16dw8Tx.png
Converted 0mgFnqeLAMr7jthUYRTv.asm to asm_images\0mgFnqeLAMr7jthUYRTv.png
Converted 0Iv6U2hbcP1xeBitW5Oo.asm to asm_images\0Iv6U2hbcP1xeBitW5Oo.png
Converted 0I4ZVvngsAatm8fzD3pk.asm to asm_images\0I4ZVvngsAatm8fzD3pk.png
Converted 0PlfqyKM1JtYZx2me5FU.asm to asm_images\0PlfqyKM1JtYZx2me5FU.png
Converted 0M7aSiE9csDzkmfKheVt.asm to 

This is just for fun haha

In [4]:
# Define the paths
asm_images_folder = './asm_images'
train_folder = './train_asm_image'
test_folder = './test_asm_image'
train_labels_path = './train_labels.csv'
test_labels_path = './test_labels.csv'

# Create train and test directories if they don't exist
os.makedirs(train_folder, exist_ok=True)
os.makedirs(test_folder, exist_ok=True)

# Load the CSV files into DataFrames
train_labels_df = pd.read_csv(train_labels_path)
test_labels_df = pd.read_csv(test_labels_path)

# Get the list of IDs from train and test CSV files
train_ids = train_labels_df['Id'].tolist()
test_ids = test_labels_df['Id'].tolist()

# Iterate through the images in the byte_images folder
for filename in os.listdir(asm_images_folder):
    # Extract the file ID (without the .png extension)
    file_id = filename.replace('.png', '')
    
    # Check if the file ID is in train or test labels
    source_path = os.path.join(asm_images_folder, filename)
    if file_id in train_ids:
        # Move the file to the train_byte_image folder
        shutil.move(source_path, os.path.join(train_folder, filename))
    elif file_id in test_ids:
        # Move the file to the test_byte_image folder
        shutil.move(source_path, os.path.join(test_folder, filename))

print("Files have been moved to the respective directories.")


Files have been moved to the respective directories.
