In [2]:
import os
import random
import csv
import re
import json
import pandas as pd

In [None]:
#### delete image_data.csv from category folder

def delete_image_data_csv(root_folder):
    for root, _, files in os.walk(root_folder):
        for file in files:
            if file == 'image_data.csv':
                file_path = os.path.join(root, file)
                try:
                    os.remove(file_path)
                except Exception as e:
                    print(f"Failed to delete {file_path}: {e}")

main_folder_path = 'server/images'
delete_image_data_csv(main_folder_path)

In [None]:
import os
import re
import random
import csv

def find_existing_numbers(root_folder, limit=100000):
    existing_numbers = set()
    pattern = re.compile(r'image_(\d+)\.jpg')  
    
    for root, dirs, files in os.walk(root_folder):
        for file in files:
            match = pattern.match(file)
            if match:
                existing_numbers.add(int(match.group(1)))
    
    return existing_numbers

def generate_unique_numbers(existing_numbers, limit=10000):
    while True:
        rand_num = random.randint(1, limit)
        if rand_num not in existing_numbers:
            existing_numbers.add(rand_num)
            return rand_num

def rename_images_and_update_csv(root_folder):
    existing_numbers = find_existing_numbers(root_folder)

    for root, dirs, files in os.walk(root_folder):
        rename_map = {}
        # Renaming the images first
        for file in files:
            if file.startswith('image_') and file.endswith('.jpg'):
                old_image_path = os.path.join(root, file)
                new_image_name = f"image_{generate_unique_numbers(existing_numbers)}.jpg"
                new_image_path = os.path.join(root, new_image_name)
                
                os.rename(old_image_path, new_image_path)
                rename_map[file.replace('.jpg', '')] = new_image_name.replace('.jpg', '')  # Map old to new name (without '.jpg')
        
        # Now update the CSV files in the same directory
        for file in files:
            if file.endswith('.csv'):
                csv_file_path = os.path.join(root, file)
                updated_csv_data = []
                try:
                    with open(csv_file_path, 'r') as csv_file:
                        csv_reader = csv.reader(csv_file)
                        header = next(csv_reader)
                        updated_csv_data.append([rename_map.get(image_name) for image_name in header])

                        # Update rows
                        for row in csv_reader:
                            row[0] = rename_map.get(row[0], row[0])  # Only update the first column (image names)
                            updated_csv_data.append(row)

                except Exception as e:
                    print(f"Error reading CSV {csv_file_path}: {e}")
                    continue
                
                try:
                    with open(csv_file_path, 'w', newline='') as csv_file:
                        csv_writer = csv.writer(csv_file)
                        csv_writer.writerows(updated_csv_data)
                    print(f"Successfully updated CSV: {csv_file_path}")
                except Exception as e:
                    print(f"Error writing to CSV {csv_file_path}: {e}")

main_folder_path = 'server/images'
rename_images_and_update_csv(main_folder_path)

In [3]:
# make id_category (newly populated server/images)

def get_img_csv_map(src_dir):
    img_csv_map = {}
    for website in os.listdir(src_dir):
        website_path = os.path.join(src_dir, website)
        if os.path.isdir(website_path):
            for category in os.listdir(website_path):
                category_path = os.path.join(website_path, category)
                if os.path.isdir(category_path):
                    csv_file = next((f for f in os.listdir(category_path) if f.endswith('.csv')), None)
                    if csv_file:
                        for img_file in os.listdir(category_path):
                            if img_file.endswith('.jpg'):
                                img_id = img_file.replace('.jpg', '')
                                img_csv_map[img_id] = csv_file.replace('.csv', '')
    return img_csv_map

def save_json(data, out_file):
    with open(out_file, 'w') as f:
        json.dump(data, f, indent=4)

if __name__ == "__main__":
    img_csv_map = get_img_csv_map('server/images')
    save_json(img_csv_map, 'server/id_category.json')


In [None]:
for root, dirs, files in os.walk('server/images'):
        for file in files:
            if file.endswith('.csv'):
                print(file)

In [4]:
CACHE_DIR = 'client/cache'
CACHE_METADATA_FILE = 'client/cache/cache_metadata.json'

def load_cache_metadata():
    if os.path.exists(CACHE_METADATA_FILE):
        with open(CACHE_METADATA_FILE, 'r') as f:
            data = json.load(f)
            if isinstance(data, list):
                return data
            else:
                return []
    return []

cache_metadata = load_cache_metadata()

category = 'Education'
[img['id'] for img in cache_metadata if img['category'] == category]

['image_1260', 'image_6350', 'image_904']