In [None]:
import os
import glob

def process_txt_files(directory_path):
    """
    Handles everything .txt files in the specified directory
    - if the file is empty: writes the number 2
    - if the file is not empty: leaves only the first character (1 or 0)
    """
    
    txt_files = glob.glob(os.path.join(directory_path, "*.txt"))
    
    processed_count = 0
    empty_count = 0
    non_empty_count_fire = 0
    non_empty_count_smoke = 0
    non_empty_count = 0
    
    for file_path in txt_files:
        try:
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read().strip()
            
            if not content:
                with open(file_path, 'w', encoding='utf-8') as file:
                    file.write('2')
                empty_count += 1
            else:
                first_char = content[0]
                # with open(file_path, 'w', encoding='utf-8') as file:
                #     file.write(first_char)
                if first_char == '0':
                    non_empty_count_smoke += 1
                elif first_char == '1':
                    non_empty_count_fire +=1
                non_empty_count +=1
                

            
            processed_count += 1
            
        except Exception as e:
            print(f"File processing error {file_path}: {e}")
    
    print(f"Processing is completed!")
    print(f"Total files processed: {processed_count}")
    print(f"Empty files (2 recorded): {empty_count}")
    print(f"Non-empty files (all): {non_empty_count}")
    print(f"Non-empty files (the fire): {non_empty_count_fire}")
    print(f"Non-empty files (the smoke): {non_empty_count_smoke}")

In [None]:
process_txt_files(os.path.join("dataset", "label"))

Processing is completed!
Total files processed: 21527
Empty files (2 recorded): 9838
Non-empty files (all): 11689
Non-empty files (the fire): 4656
Non-empty files (the smoke): 7033


In [None]:
import csv

def extract_data_to_csv(directory_path, output_csv="labels-for-images.csv"):
    """
    Collects data from all of them .txt files to CSV file
    Format: file_name, number, coordinate 1, coordinate 2, coordinate 3, coordinate 4
    """
    
    txt_files = glob.glob(os.path.join(directory_path, "*.txt"))
    data_rows = []
    
    processed_count = 0
    error_count = 0
    
    for file_path in txt_files:
        try:
            filename = os.path.splitext(os.path.basename(file_path))[0]
            
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read().strip()
            
            if not content:
                number = 2
                coords = [0.0, 0.0, 0.0, 0.0]
            else:
                parts = content.split()
                
                if parts:
                    try:
                        number = int(float(parts[0]))
                    except ValueError:
                        number = 0
                else:
                    number = 0
                
                coords = [0.0, 0.0, 0.0, 0.0]
                
                for i in range(1, min(5, len(parts))):
                    try:
                        coords[i-1] = float(parts[i])
                    except (ValueError, IndexError):
                        coords[i-1] = 0.0
            
            row = [filename, number] + coords
            data_rows.append(row)
            
            processed_count += 1
            
        except Exception as e:
            error_count += 1
    

    with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        
        writer.writerow(['filename', 'detect', 'coord1', 'coord2', 'coord3', 'coord4'])
        
        writer.writerows(data_rows)
    
    print(f"Complete!")
    print(f"Save result in: {output_csv}")
        

In [6]:
extract_data_to_csv(os.path.join("dataset", "label"))

Complete!
Save result in: labels-for-images.csv
