In [14]:
import pandas as pd
import json
import os

def safe_get(data_list, default=''):
    """Utility function to safely get the first element of a list or return a default value if the list is empty."""
    return data_list[0] if data_list else default

def get_label(data_list, default=''):
    """Utility function to get the label from the first element of a list or return a default value if the list is empty or label is missing."""
    return data_list[0].get('label', default) if data_list and 'label' in data_list[0] else default

# Set the directory containing the JSON files
directory_path = os.getenv('JSON_DIRECTORY_PATH', r'D:\1800spirits-salsify-data-load\input_folders\liquorfiles')

# Set the output directory
output_directory = r'D:\1800spirits-salsify-data-load\Output_files'
os.makedirs(output_directory, exist_ok=True)

# Initialize an empty list to collect rows
rows = []

# Iterate over all JSON files in the directory
for filename in os.listdir(directory_path):
    if filename.endswith('.json'):
        file_path = os.path.join(directory_path, filename)
        
        # Load each JSON file with utf-8 encoding
        with open(file_path, 'r', encoding='utf-8') as file:
            data = json.load(file)['data']
        
        # Extract data for each item in the JSON file
        for item in data:
            row = {
                'GTIN': item.get('gtin', ''),
                'Manufacturer Name': item.get('manufacturerNameText', ''),
                'Brand': item.get('brandText', ''),
                'Volume': safe_get(item.get('netContent', [{}]), {}).get('data', ''),
                'Volume Code': safe_get(item.get('netContent', [{}]), {}).get('expressedIn', {}).get('code', ''),
                'namePublicLong': safe_get(item.get('namePublicLong', [{}]), {}).get('data', ''),
                'namePublicShort': safe_get(item.get('namePublicShort', [{}]), {}).get('data', ''),
                'functionalName': safe_get(item.get('functionalName', [{}]), {}).get('data', ''),
                'typePackaging': item.get('typePackaging', {}).get('code', ''),
                'useableUnits': item.get('useableUnits', ''),
                'ratioAlcohol': item.get('ratioAlcohol', ''),
                'packagingInformationList': get_label(item.get('packagingInformationList', [{}]), ''),
                'countryOfOriginList': get_label(item.get('countryOfOriginList', [{}]), 'USA'),
                'kind': item.get('kind', {}).get('label', '')
            }
            rows.append(row)

# Create a DataFrame from the list of rows
df = pd.DataFrame(rows, columns=[
    'GTIN',
    'Manufacturer Name',
    'Brand',
    'Volume',
    'Volume Code',
    'namePublicLong',
    'namePublicShort',
    'functionalName',
    'typePackaging',
    'useableUnits',
    'ratioAlcohol',
    'packagingInformationList',
    'countryOfOriginList',
    'kind'
])

# Save the DataFrame to a CSV file
output_csv_path = os.path.join(output_directory, 'liquorfiles-08-july.csv')
df.to_csv(output_csv_path, index=False)

print(f"Data extraction complete. CSV file created at '{output_csv_path}'.")


Data extraction complete. CSV file created at 'D:\1800spirits-salsify-data-load\Output_files\liquorfiles-08-july.csv'.
