#### This notebook was used to find lists and dictionaries within the "features" dictionary where Gemini extracted features. They needed to be flattened because I didn't want to make and parse lists and dictionaries in databases. 

In [8]:
import os
import json


def flatten_json(json_obj, parent_key='', sep='_'):
    items = {}
    for k, v in json_obj.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            if parent_key == "features":
                items.update(flatten_json(v, parent_key=new_key, sep=sep))
            else:
                items[new_key] = v
        elif isinstance(v, list):
            for i, item in enumerate(v):
                if isinstance(item, dict):
                    items.update(flatten_json(item, parent_key=new_key, sep=sep))
                else:
                    items[new_key] = v
        else:
            items[new_key] = v
    return items

def process_files_in_folder(input_folder, output_folder):
    for filename in os.listdir(input_folder):
        if filename.endswith('.json'):
            input_file_path = os.path.join(input_folder, filename)
            with open(input_file_path, 'r', encoding='utf-8') as file:
                json_data = json.load(file)
                flattened_data = flatten_json(json_data)
                output_file_path = os.path.join(output_folder, f"{filename}")
                with open(output_file_path, 'w', encoding='utf-8') as output_file:
                    json.dump(flattened_data, output_file, ensure_ascii=False, indent=4)

                    
# Replace 'input_folder_path' and 'output_folder_path' with your input and output directory paths
input_folder_path = r'C:\Users\tomce\OneDrive - UKIM, FINKI\Desktop\Fakultet 3ta Godina\2 Sesti Semestar\0 DATA SCIENCE SEMINARSKA\1 Starting Over\products_categories_fixed\all_extracted_with_lists_dicts'
output_folder_path = r'C:\Users\tomce\OneDrive - UKIM, FINKI\Desktop\Fakultet 3ta Godina\2 Sesti Semestar\0 DATA SCIENCE SEMINARSKA\1 Starting Over\products_categories_fixed\all_extracted_dict_flattened'

process_files_in_folder(input_folder_path, output_folder_path)


In [27]:
import os
import json

def has_lists_or_dicts(data):
    if isinstance(data, dict):
        for value in data.values():
            if isinstance(value, (list, dict)):
                return True
    elif isinstance(data, list):
        for item in data:
            if isinstance(item, (list, dict)):
                return True
    return False

def check_features_for_lists_dicts(folder_path):
    count= 0
    for filename in os.listdir(folder_path):
        if filename.endswith('.json'):
            file_path = os.path.join(folder_path, filename)
            with open(file_path, 'r', encoding='utf-8') as file:
                try:
                    json_data = json.load(file)
                    if 'features' in json_data and has_lists_or_dicts(json_data['features']):
                        print(f"{file_path}: 'features' dictionary contains lists or dictionaries.")
                        count+=1
                except Exception as e:
                    print(f"Error while processing {file_path}: {e}")
    print(count)

# Replace 'folder_path' with the path to your directory containing JSON files
folder_path = r'C:\Users\tomce\OneDrive - UKIM, FINKI\Desktop\Fakultet 3ta Godina\2 Sesti Semestar\0 DATA SCIENCE SEMINARSKA\1 Starting Over\products_categories_fixed\all_extracted_universal_template'

check_features_for_lists_dicts(folder_path)



0


In [24]:
import os
import json
import shutil

def has_lists_or_dicts(data):
    if isinstance(data, dict):
        for value in data.values():
            if isinstance(value, (list, dict)):
                return True
    elif isinstance(data, list):
        for item in data:
            if isinstance(item, (list, dict)):
                return True
    return False

def check_and_copy_features_for_lists_dicts(source_folder, destination_folder):
    count = 0
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)
    
    for filename in os.listdir(source_folder):
        if filename.endswith('.json'):
            file_path = os.path.join(source_folder, filename)
            with open(file_path, 'r', encoding='utf-8') as file:
                try:
                    json_data = json.load(file)
                    if 'features' not in json_data or json_data['features'] == None or json_data['features'] == {}:
                        print(f"{file_path}: 'features' dictionary contains lists or dictionaries.")
                        shutil.copy(file_path, destination_folder)
                        count += 1
                except Exception as e:
                    print(f"Error while processing {file_path}: {e}")
    
    print(f"Total files copied: {count}")

# Replace 'source_folder' with the path to your directory containing JSON files
source_folder = r'C:\Users\tomce\OneDrive - UKIM, FINKI\Desktop\Fakultet 3ta Godina\2 Sesti Semestar\0 DATA SCIENCE SEMINARSKA\1 Starting Over\products_categories_fixed\all_extracted_universal_template'
# Replace 'destination_folder' with the path to your target directory where you want to copy the files
destination_folder = r'C:\Users\tomce\OneDrive - UKIM, FINKI\Desktop\Fakultet 3ta Godina\2 Sesti Semestar\0 DATA SCIENCE SEMINARSKA\1 Starting Over\products_categories_fixed\all_missing_features'

check_and_copy_features_for_lists_dicts(source_folder, destination_folder)


C:\Users\tomce\OneDrive - UKIM, FINKI\Desktop\Fakultet 3ta Godina\2 Sesti Semestar\0 DATA SCIENCE SEMINARSKA\1 Starting Over\products_categories_fixed\all_extracted_universal_template\processed_BANZ-0-3-god--Kaleidoscope.json: 'features' dictionary contains lists or dictionaries.
C:\Users\tomce\OneDrive - UKIM, FINKI\Desktop\Fakultet 3ta Godina\2 Sesti Semestar\0 DATA SCIENCE SEMINARSKA\1 Starting Over\products_categories_fixed\all_extracted_universal_template\processed_BARNER-Astoria-Light-Jade.json: 'features' dictionary contains lists or dictionaries.
C:\Users\tomce\OneDrive - UKIM, FINKI\Desktop\Fakultet 3ta Godina\2 Sesti Semestar\0 DATA SCIENCE SEMINARSKA\1 Starting Over\products_categories_fixed\all_extracted_universal_template\processed_BARNER-Chamberi---Dark-Green.json: 'features' dictionary contains lists or dictionaries.
C:\Users\tomce\OneDrive - UKIM, FINKI\Desktop\Fakultet 3ta Godina\2 Sesti Semestar\0 DATA SCIENCE SEMINARSKA\1 Starting Over\products_categories_fixed\all_e

In [26]:
import os
import json

def has_lists_or_dicts(data):
    if isinstance(data, dict):
        for value in data.values():
            if isinstance(value, (list, dict)):
                return True
    elif isinstance(data, list):
        for item in data:
            if isinstance(item, (list, dict)):
                return True
    return False

def check_features_for_lists_dicts(folder_path):
    count= 0
    for filename in os.listdir(folder_path):
        if filename.endswith('.json'):
            file_path = os.path.join(folder_path, filename)
            with open(file_path, 'r', encoding='utf-8') as file:
                try:
                    json_data = json.load(file)
                    if 'features' not in json_data or json_data['features'] == None or json_data['features'] == {}:
                        print(f"{file_path}: 'features' dictionary contains lists or dictionaries.")
                        count+=1
                except Exception as e:
                    print(f"Error while processing {file_path}: {e}")
    print(count)

# Replace 'folder_path' with the path to your directory containing JSON files
folder_path = r'C:\Users\tomce\OneDrive - UKIM, FINKI\Desktop\Fakultet 3ta Godina\2 Sesti Semestar\0 DATA SCIENCE SEMINARSKA\1 Starting Over\products_categories_fixed\all_extracted_universal_template'

check_features_for_lists_dicts(folder_path)



0
