## Converting LLM generated results from .json to .csv
- Also unifying feature names (uneven cases)
- Lumping all the demographic information into one column
- source file `llm_generated_profile.json`
- In preparation for setting up the final database to track all the profile image generation by AI models

In [1]:
import json
import pandas as pd
import os

In [2]:
def format_interests(interests):
    if isinstance(interests, list):
        return ', '.join(interests)
    return str(interests)

def standardize_fields(profile):
    # Map various field names to standard ones
    field_mapping = {
        'height': 'Height',
        'Height': 'Height',
        'weight': 'Weight',
        'Weight': 'Weight',
        'eye_color': 'Eye_Color',
        'Eye_Color': 'Eye_Color',
        'hair': 'Hair',
        'Hair': 'Hair',
        'hair_color': 'Hair Color',
        'hair_texture': 'Hair Texture',
        'skin_tone': 'Skin_Tone',
        'Skin_Tone': 'Skin_Tone',
        'education': 'Education',
        'Education': 'Education',
        'education_level': 'Education Level',
        'education_field': 'Education Field',
        'ethnic_background': 'Ethnic_Background',
        'Ethnic_Background': 'Ethnic_Background',
        'occupation': 'Occupation',
        'Occupation': 'Occupation',
        'interests_hobbies': 'Interests_Hobbies',
        'Interests_Hobbies': 'Interests_Hobbies',
        'description': 'Description',
        'Description': 'Description',
        'donor_description': 'Description'
    }
    
    # Combine demographic info
    demo_info_parts = []
    
    # Handle height and weight
    height = profile.get(next((k for k in profile if k.lower() == 'height'), None), '')
    weight = profile.get(next((k for k in profile if k.lower() == 'weight'), None), '')
    demo_info_parts.extend([f"Height: {height}", f"Weight: {weight}"])
    
    # Handle eye color
    eye_color = profile.get(next((k for k in profile if k.lower() in ['eye_color', 'eye color']), None), '')
    demo_info_parts.append(f"Eye Color: {eye_color}")
    
    # Handle hair
    hair_color = profile.get('hair_color', '')
    hair_texture = profile.get('hair_texture', '')
    hair = profile.get('Hair', profile.get('hair', ''))
    if hair_color and hair_texture:
        demo_info_parts.append(f"Hair: {hair_color}/{hair_texture}")
    else:
        demo_info_parts.append(f"Hair: {hair}")
    
    # Handle remaining demographic fields
    demo_fields = ['Skin_Tone', 'Education', 'Ethnic_Background', 'Occupation']
    for field in demo_fields:
        value = profile.get(next((k for k in profile if k.lower() == field.lower()), None), '')
        if field == 'Education' and 'education_field' in profile:
            value = f"{profile.get('education_level', '')} in {profile.get('education_field', '')}"
        demo_info_parts.append(f"{field}: {value}")
    
    # Handle interests/hobbies
    interests = profile.get(next((k for k in profile if k.lower() in ['interests_hobbies', 'interests and hobbies']), None), [])
    demo_info_parts.append(f"Interests/Hobbies: {format_interests(interests)}")
    
    return {
        'profile_id': profile.get('profile_id', ''),
        'demographic_info': ' | '.join(filter(None, demo_info_parts)),
        'description': profile.get(next((k for k in profile if k.lower() in ['description', 'donor_description']), None), '')
    }



In [None]:
def convert_json_to_csv(input_path, output_dir):
    # Read and parse JSON
    with open(input_path, 'r') as file:
        data = json.load(file)
    
    # Process each profile
    processed_data = [standardize_fields(profile) for profile in data]
    
    # Convert to DataFrame
    df = pd.DataFrame(processed_data)
    
    # Create output filename
    output_path = os.path.join(output_dir, 'llm_generated_profiles.csv')
    
    # Save to CSV
    df.to_csv(output_path, index=False)
    print(f"CSV file saved to: {output_path}")



In [4]:
# File paths
input_path = "/Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/outputs/llm_generated_profile.json"
output_dir = "/Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/outputs"

# Run conversion
convert_json_to_csv(input_path, output_dir)

CSV file saved to: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/outputs/donor_profiles.csv
