In [3]:
import pandas as pd
import numpy as np
import random
import uuid
from faker import Faker

fake = Faker()
random.seed(42)
np.random.seed(42)

NUM_RECORDS = 1200

# --- Define Options & Weights ---

# Occupation distribution
occupations = ["Collector", "Casual Buyer", "Investor", "Interior Designer", "Art Dealer", "Corporate Buyer"]
occupation_weights = [0.40, 0.30, 0.10, 0.10, 0.05, 0.05]

# Preferred Art Styles with weights (reflecting report percentages for key genres)
art_styles = ["Abstract", "Expressive Figurative", "Realist Figurative", "Contemporary Surrealism",
              "Landscape", "Conceptual", "Minimalist", "Street Art", "Neo-Abstract Expressionist", "Other"]
art_style_weights = np.array([50, 34, 22, 17, 10, 10, 10, 5, 5, 3], dtype=float)
art_style_probs = art_style_weights / art_style_weights.sum()

# Favorite Mediums with weights
mediums = ["Oil", "Acrylic", "Watercolor", "Digital", "Sculpture", "Mixed Media", "Ceramics", "Photography", "Prints & Multiples"]
medium_weights = np.array([0.30, 0.20, 0.10, 0.10, 0.10, 0.10, 0.05, 0.03, 0.02])
medium_probs = medium_weights / medium_weights.sum()

# Buying Motivation probabilities (each can be independently chosen)
motivations = ["Build a Collection", "Decorate Home/Space", "Support Artists", "Inspire Me", "Investment"]
motivation_probs = {
    "Build a Collection": 0.64,
    "Decorate Home/Space": 0.62,
    "Support Artists": 0.52,
    "Inspire Me": 0.50,
    "Investment": 0.38
}

# Budget Range options with probabilities
budget_ranges = ["$500-$5,000", "$5,000-$20,000", "$20,000-$100,000", "$100,000-$250,000", "$250,000+"]
budget_weights = [0.44, 0.30, 0.15, 0.07, 0.04]

# Buying Frequency options with probabilities
frequency_options = ["1-2 artworks", "3-5 artworks", "6-9 artworks", "10+ artworks"]
frequency_weights = [0.50, 0.30, 0.15, 0.05]

# Engagement Level options
engagement_levels = ["Rare", "Occasional", "Frequent"]
engagement_weights = [0.30, 0.50, 0.20]

# Preferred Buying Channel options with weights (for multi-select)
buying_channels = ["Art Galleries", "Art Fairs", "Online Marketplaces", "Direct from Artists", "In-Person Auctions", "Social Media"]
buying_channel_weights = np.array([0.30, 0.10, 0.40, 0.15, 0.05, 0.20], dtype=float)
buying_channel_probs = buying_channel_weights / buying_channel_weights.sum()

# Online Art Buying Behavior options
online_behavior = ["Never bought online", "Bought 1-2 times", "Bought 3+ times", "Majority of purchases are online"]
online_behavior_weights_default = [0.20, 0.30, 0.30, 0.20]
# For younger collectors (<36), slightly adjusted probabilities:
online_behavior_weights_young = [0.10, 0.20, 0.40, 0.30]

# --- Helper Functions ---

def sample_multiple(options, probs, min_count=1, max_count=3):
    """Sample a random number (between min_count and max_count) of unique items from options using given probabilities."""
    k = random.randint(min_count, max_count)
    # Normalize probabilities for sampling without replacement
    probs = np.array(probs)
    probs = probs / probs.sum()
    return list(np.random.choice(options, size=k, replace=False, p=probs))

def generate_purchase_price(budget_range):
    """Generate a purchase price based on the buyer's budget range."""
    if budget_range == "$500-$5,000":
        return round(random.uniform(500, 5000), 2)
    elif budget_range == "$5,000-$20,000":
        return round(random.uniform(5000, 20000), 2)
    elif budget_range == "$20,000-$100,000":
        return round(random.uniform(20000, 100000), 2)
    elif budget_range == "$100,000-$250,000":
        return round(random.uniform(100000, 250000), 2)
    elif budget_range == "$250,000+":
        return round(random.uniform(250000, 500000), 2)
    else:
        return round(random.uniform(500, 5000), 2)

def generate_purchase_records(freq_category, preferred_styles, preferred_mediums, budget_range):
    """Generate a list of past purchase records based on frequency and buyer preferences."""
    if freq_category == "1-2 artworks":
        num_purchases = random.randint(1, 2)
    elif freq_category == "3-5 artworks":
        num_purchases = random.randint(3, 5)
    elif freq_category == "6-9 artworks":
        num_purchases = random.randint(6, 9)
    else:  # "10+ artworks"
        num_purchases = random.randint(10, 15)
    
    purchases = []
    for _ in range(num_purchases):
        # Use buyer's preferred art styles/mediums if available, else sample from overall distribution.
        style = random.choice(preferred_styles) if preferred_styles else np.random.choice(art_styles, p=art_style_probs)
        medium = random.choice(preferred_mediums) if preferred_mediums else np.random.choice(mediums, p=medium_probs)
        price = generate_purchase_price(budget_range)
        artist = "Artist " + fake.last_name()
        purchases.append({
            "Style": style,
            "Medium": medium,
            "Price": price,
            "Artist": artist
        })
    return purchases

def select_online_behavior(age):
    """Select online buying behavior with adjustments for younger collectors."""
    if age < 36:
        return np.random.choice(online_behavior, p=np.array(online_behavior_weights_young) / sum(online_behavior_weights_young))
    else:
        return np.random.choice(online_behavior, p=np.array(online_behavior_weights_default) / sum(online_behavior_weights_default))

# --- Generate Synthetic Data ---
records = []

for _ in range(NUM_RECORDS):
    buyer_id = str(uuid.uuid4())
    name = fake.name()
    age = random.randint(18, 80)
    location = f"{fake.city()}, {fake.country()}"
    occupation = random.choices(occupations, weights=occupation_weights, k=1)[0]
    
    # Sample Preferred Art Styles and Favorite Mediums (as lists)
    preferred_styles = sample_multiple(art_styles, art_style_probs, 1, 3)
    favorite_mediums = sample_multiple(mediums, medium_probs, 1, 3)
    
    # Buying Motivation: choose each motivation based on its probability.
    buyer_motivations = [motivation for motivation in motivations if random.random() < motivation_probs[motivation]]
    # Ensure at least one motivation is selected
    if not buyer_motivations:
        buyer_motivations = ["Build a Collection"]
    
    # Budget Range
    budget_range = random.choices(budget_ranges, weights=budget_weights, k=1)[0]
    
    # Buying Frequency
    buying_frequency = random.choices(frequency_options, weights=frequency_weights, k=1)[0]
    
    # Engagement Level
    engagement_level = random.choices(engagement_levels, weights=engagement_weights, k=1)[0]
    
    # Preferred Buying Channels (sample up to 2 channels)
    preferred_channels = sample_multiple(buying_channels, buying_channel_probs, 1, 2)
    
    # Online Art Buying Behavior (adjust for age)
    online_behavior_choice = select_online_behavior(age)
    
    # Past Purchases records
    past_purchases = generate_purchase_records(buying_frequency, preferred_styles, favorite_mediums, budget_range)
    
    record = {
        "Buyer ID": buyer_id,
        "Name": name,
        "Age": age,
        "Location": location,
        "Occupation": occupation,
        "Preferred Art Styles": preferred_styles,
        "Favorite Mediums": favorite_mediums,
        "Buying Motivation": buyer_motivations,
        "Budget Range": budget_range,
        "Buying Frequency": buying_frequency,
        "Engagement Level": engagement_level,
        "Preferred Buying Channels": preferred_channels,
        "Online Art Buying Behavior": online_behavior_choice,
        "Past Purchases": past_purchases
    }
    
    records.append(record)

# Convert to DataFrame. Note that nested fields (lists/dicts) will be stored as strings.
df = pd.DataFrame(records)

# Optionally, save the dataset to a CSV file (nested columns will be stringified)
df.to_csv("synthetic_art_buyers.csv", index=False)

print("Synthetic dataset generated successfully!")
print(df.head())


Synthetic dataset generated successfully!
                               Buyer ID              Name  Age  \
0  87e267ad-2f5f-4180-8e12-6af953d9be15  Alexander Carter   58   
1  eb7fcd17-e393-49e6-829d-2b9f6685b4bf     Daniel Weaver   62   
2  85236087-f011-4705-bcf6-323a4a5fec3b    Angela Donovan   72   
3  fd349333-6e8f-4213-bccc-376613efe3c9   Elizabeth Brown   61   
4  5c3e4b6a-1919-4357-870e-82d6a4fd3e1a   Jill Morrow DDS   59   

                                           Location    Occupation  \
0                             Cathyland, Micronesia     Collector   
1                     Adamshire, Russian Federation  Casual Buyer   
2                             South Autumn, Denmark     Collector   
3                               West Sarah, Finland  Casual Buyer   
4  New Alyssaport, Saint Vincent and the Grenadines  Casual Buyer   

                                Preferred Art Styles  \
0  [Expressive Figurative, Street Art, Contempora...   
1                                 

In [2]:
!pip install faker

Collecting faker
  Downloading Faker-36.1.1-py3-none-any.whl.metadata (15 kB)
Downloading Faker-36.1.1-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: faker
Successfully installed faker-36.1.1


In [3]:
import pandas as pd
import numpy as np
import random

# Load the dataset
df = pd.read_csv("toy_dataset_label.csv", delimiter='\t')

# Drop unwanted columns
drop_columns = ["Born-Died", "FILE", "DATE", "FORM", "TYPE", "SCHOOL", "TIMELINE", "TECHNIQUE"]
df = df.drop(columns=drop_columns, errors='ignore')

# Rename columns
rename_columns = {
    "ID": "Artwork ID",
    "TITLE": "Title",
    "AUTHOR": "Artist",
    "LOCATION": "Location",
    "URL": "Url"
}
df = df.rename(columns=rename_columns)

# Add new columns with logical random values
def generate_year():
    return random.randint(1950, 2025)  # Ensures realistic historical to modern artwork

def generate_price_range():
    price_brackets = ["$500-$5,000", "$5,000-$20,000", "$20,000-$100,000", "$100,000-$250,000", "$250,000+"]
    return random.choice(price_brackets)

df["Year Created"] = df.apply(lambda x: generate_year(), axis=1)
df["Description"] = ""
df["Art Style(s)"] = ""
df["Medium"] = ""
df["Keywords/Tags"] = ""
df["Dominant Colors"] = ""
df["Mood/Tone"] = ""
df["Price/Value Range"] = df.apply(lambda x: generate_price_range(), axis=1)

# Save the processed dataset
df.to_csv("processed_art500k.csv", index=False)

print("Dataset processed and saved as 'processed_art500k.csv'")


Dataset processed and saved as 'processed_art500k.csv'


In [5]:
df.head(50)

Unnamed: 0,Artwork ID,Artist,BORN-DIED,Title,Location,Url,Year Created,Description,Art Style(s),Medium,Keywords/Tags,Dominant Colors,Mood/Tone,Price/Value Range
0,1,"AACHEN, Hans von","(b. 1552, Köln, d. 1615, Praha)",Allegory,"Alte Pinakothek, Munich",http://www.wga.hu/html/a/aachen/allegory.html,2018,,,,,,,"$20,000-$100,000"
1,2,"AACHEN, Hans von","(b. 1552, Köln, d. 1615, Praha)","Bacchus, Ceres and Cupid","Kunsthistorisches Museum, Vienna",http://www.wga.hu/html/a/aachen/bacchus.html,1977,,,,,,,"$250,000+"
2,3,"AACHEN, Hans von","(b. 1552, Köln, d. 1615, Praha)",Joking Couple,"Kunsthistorisches Museum, Vienna",http://www.wga.hu/html/a/aachen/j_couple.html,2006,,,,,,,"$5,000-$20,000"
3,4,"AACHEN, Hans von","(b. 1552, Köln, d. 1615, Praha)",Portrait of Emperor Rudolf II,"Kunsthistorisches Museum, Vienna",http://www.wga.hu/html/a/aachen/rudolf2.html,1968,,,,,,,"$5,000-$20,000"
4,5,"AACHEN, Hans von","(b. 1552, Köln, d. 1615, Praha)",Self-Portrait with a Glass of Wine,Private collection,http://www.wga.hu/html/a/aachen/selfport.html,1985,,,,,,,"$250,000+"
5,6,"AAGAARD, Carl Frederik","(b. 1833, Odense, d. 1895, København)",Deer beside a Lake,Private collection,http://www.wga.hu/html/a/aagaard/deerlake.html,1997,,,,,,,"$500-$5,000"
6,7,"AAGAARD, Carl Frederik","(b. 1833, Odense, d. 1895, København)",The Rose Garden,Private collection,http://www.wga.hu/html/a/aagaard/rosegard.html,1955,,,,,,,"$250,000+"
7,8,"ABADIA, Juan de la",(active 1470-1490 in Huesca),The Archangel Michael,"Museu Nacional d'Art de Catalunya, Barcelona",http://www.wga.hu/html/a/abadia/michael.html,1975,,,,,,,"$20,000-$100,000"
8,9,"ABAQUESNE, Masséot","(b. ca 1500, Cherbourg, d. 1564, Sotteville-lè...",Albarello,"Musée des Beaux-Arts, Rouen",http://www.wga.hu/html/a/abaquesn/albarell.html,2007,,,,,,,"$100,000-$250,000"
9,10,"ABAQUESNE, Masséot","(b. ca 1500, Cherbourg, d. 1564, Sotteville-lè...",Ceramic Floor,"Musée National de la Renaissance, Écouen",http://www.wga.hu/html/a/abaquesn/floor1.html,1963,,,,,,,"$250,000+"


In [3]:
import requests
from bs4 import BeautifulSoup
import os

# Step 1: Define the main webpage URL
main_url = "https://www.wga.hu/html/a/aagaard/rosegard.html"
headers = {"User-Agent": "Mozilla/5.0"}

# Step 2: Fetch the main webpage
response = requests.get(main_url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

# Step 3: Extract the image link from <a> tag
img_tag = soup.find("a", href=True)  # Find first <a> tag with href attribute

if img_tag and img_tag["href"].endswith(".jpg"):
    img_url = "https://www.wga.hu" + img_tag["href"]  # Construct full image URL
    print(f"Image found: {img_url}")

    # Step 4: Download the image
    img_data = requests.get(img_url, headers=headers).content
    img_filename = "downloaded_artwork.jpg"

    with open(img_filename, "wb") as img_file:
        img_file.write(img_data)

    print(f"Image saved successfully as {img_filename}")

else:
    print("No image found on the page.")


No image found on the page.


In [5]:
import pandas as pd
import numpy as np
import random
df = pd.read_csv("synthetic_art_buyers.csv")

In [7]:
drop_columns = ["Age", "Location", "Occupation","Budget Range","Buying Frequency","Engagement Level","Preferred Buying Channels","Online Art Buying Behavior","Past Purchases"]
df = df.drop(columns=drop_columns, errors='ignore')

In [8]:
df.head()

Unnamed: 0,Buyer ID,Name,Preferred Art Styles,Favorite Mediums,Buying Motivation
0,87e267ad-2f5f-4180-8e12-6af953d9be15,Alexander Carter,"['Expressive Figurative', 'Street Art', 'Conte...","['Watercolor', 'Oil']","['Build a Collection', 'Decorate Home/Space', ..."
1,eb7fcd17-e393-49e6-829d-2b9f6685b4bf,Daniel Weaver,['Minimalist'],"['Digital', 'Sculpture']","['Build a Collection', 'Support Artists']"
2,85236087-f011-4705-bcf6-323a4a5fec3b,Angela Donovan,"['Abstract', 'Expressive Figurative']",['Acrylic'],"['Decorate Home/Space', 'Inspire Me']"
3,fd349333-6e8f-4213-bccc-376613efe3c9,Elizabeth Brown,"['Abstract', 'Realist Figurative', 'Expressive...","['Acrylic', 'Sculpture', 'Oil']","['Build a Collection', 'Support Artists', 'Ins..."
4,5c3e4b6a-1919-4357-870e-82d6a4fd3e1a,Jill Morrow DDS,"['Abstract', 'Realist Figurative', 'Expressive...","['Ceramics', 'Photography']","['Build a Collection', 'Decorate Home/Space']"


In [9]:
df.to_csv("buyer_data.csv", index=False)

In [4]:
import os
import random
import shutil

# Define the source folder containing images and the destination folder
source_folder = "toy_dataset"
destination_folder = "random_50_data"

# Ensure the destination folder exists
os.makedirs(destination_folder, exist_ok=True)

# Get a list of all image files in the source folder
image_files = [f for f in os.listdir(source_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp'))]

# Randomly select 50 images
selected_images = random.sample(image_files, 50)

# Copy selected images to the destination folder
for img in selected_images:
    shutil.copy(os.path.join(source_folder, img), os.path.join(destination_folder, img))

print(f"Successfully copied {len(selected_images)} images to {destination_folder}")


Successfully copied 50 images to random_50_data


In [10]:
import pandas as pd

# Load the dataset
file_path = "buyer_data.csv"  # Update with actual path
df = pd.read_csv(file_path)

# Randomly select 250 rows
df_sample = df.sample(n=250, random_state=42)  # Setting random_state for reproducibility

# Save the sampled data to a new CSV file
output_path = "sample_250_buyer_data.csv"  # Update with desired output path
df_sample.to_csv(output_path, index=False)

print(f"Successfully saved 250 random entries to {output_path}")


Successfully saved 250 random entries to sample_250_buyer_data.csv


In [11]:
df = pd.read_csv("gallerygpt_descriptions.csv")

In [12]:
df.head()

Unnamed: 0,Artwork ID,Description,Art Style(s),Medium,Keywords/Tags,Dominant Colors,Mood/Tone
0,1229,The image presents an expansive view of a cath...,,,,,
1,2346,The portrait depicts a man seated in a three-q...,,,,,
2,2535,This exquisite still life composition presents...,,,,,
3,2989,The sculpture presents a woman draped in a flo...,,,,,
4,3527,This exquisite piece of Renaissance artistry i...,,,,,


In [13]:
drop_columns = ["Art Style(s)", "Medium", "Keywords/Tags", "Dominant Colors", "Mood/Tone"]
df = df.drop(columns=drop_columns, errors='ignore')

In [14]:
df.head()

Unnamed: 0,Artwork ID,Description
0,1229,The image presents an expansive view of a cath...
1,2346,The portrait depicts a man seated in a three-q...
2,2535,This exquisite still life composition presents...
3,2989,The sculpture presents a woman draped in a flo...
4,3527,This exquisite piece of Renaissance artistry i...


In [15]:
df.to_csv("art_data.csv", index=False)

In [16]:
#!/usr/bin/env python3
"""
update_csv.py
-------------
Reads 'sample_250_buyer_data.csv' and converts bracketed list-like strings 
(e.g. ['Minimalist']) into comma-separated values (e.g. Minimalist).

Example input row:
eb7fcd17-e393-49e6-829d-2b9f6685b4bf    Daniel Weaver    ['Minimalist']    ['Digital', 'Sculpture']    ['Build a Collection', 'Support Artists']

Example output row:
eb7fcd17-e393-49e6-829d-2b9f6685b4bf    Daniel Weaver    Minimalist    Digital, Sculpture    Build a Collection, Support Artists
"""

import csv
import ast

# CSV file paths (adjust as needed)
INPUT_CSV = "sample_250_buyer_data.csv"
OUTPUT_CSV = "sample_250_buyer_data_updated.csv"

# Columns that might contain bracketed list strings
LIST_LIKE_COLUMNS = ["Preferred Art Styles", "Favorite Mediums", "Buying Motivation"]


def parse_listlike(s: str) -> str:
    """
    Attempts to parse a string like "['Digital', 'Sculpture']" into a Python list.
    If successful, returns a comma-separated string (e.g. "Digital, Sculpture").
    If parsing fails, returns the original string.
    """
    if not s:
        return s
    try:
        # Try interpreting the string as a Python literal (list, tuple, etc.)
        val = ast.literal_eval(s)
        if isinstance(val, list):
            # Convert list to comma-separated string
            return ", ".join(str(x) for x in val)
    except (SyntaxError, ValueError):
        pass
    # Fallback: return as-is if not parseable
    return s


def main():
    # Read the original CSV
    with open(INPUT_CSV, "r", encoding="utf-8") as infile:
        reader = csv.DictReader(infile)
        fieldnames = reader.fieldnames

        # Write to a new CSV
        with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as outfile:
            writer = csv.DictWriter(outfile, fieldnames=fieldnames)
            writer.writeheader()

            for row in reader:
                # For each column that might be bracketed, parse & convert
                for col in LIST_LIKE_COLUMNS:
                    if col in row:
                        row[col] = parse_listlike(row[col])
                writer.writerow(row)

    print(f"✅ Updated CSV saved as '{OUTPUT_CSV}'")


if __name__ == "__main__":
    main()


✅ Updated CSV saved as 'sample_250_buyer_data_updated.csv'


In [19]:
import pandas as pd

# Load the CSV file
df = pd.read_csv("sample_250_buyer_data_updated.csv")

# Keep only the first 5 records
df_limited = df.head(1)

# Save the new file
df_limited.to_csv("sample_1_buyer_data.csv", index=False)

print("File saved as sample_5_buyer_data.csv with first 5 records.")


File saved as sample_5_buyer_data.csv with first 5 records.


In [18]:
df_limited.head()


Unnamed: 0,Buyer ID,Name,Preferred Art Styles,Favorite Mediums,Buying Motivation
0,0d14e417-be91-41fe-ad0d-7507adfbbb9c,Amanda Tucker,"Expressive Figurative, Abstract, Contemporary ...","Oil, Acrylic","Build a Collection, Support Artists, Inspire Me"
1,4f194c1a-6708-4af7-8990-27a4c33579fe,Eduardo Buck,Expressive Figurative,"Sculpture, Watercolor","Build a Collection, Decorate Home/Space, Inspi..."
2,0d5a157c-9770-4b5c-a6ea-90d19072abb5,Tyler Chandler,Abstract,"Oil, Mixed Media, Digital","Build a Collection, Decorate Home/Space, Suppo..."
3,439af199-15e1-4694-aed1-30bddab21e16,Melanie Douglas,Expressive Figurative,"Ceramics, Watercolor, Oil","Build a Collection, Decorate Home/Space, Suppo..."
4,a797007c-9bda-44c7-8ff8-94fdf6d8e572,Douglas Farrell,"Expressive Figurative, Contemporary Surrealism",Sculpture,Build a Collection


In [3]:
import pandas as pd

In [4]:
df = pd.read_csv("synthetic_art_buyers.csv")

In [5]:
df.columns

Index(['Buyer ID', 'Name', 'Age', 'Location', 'Occupation',
       'Preferred Art Styles', 'Favorite Mediums', 'Buying Motivation',
       'Budget Range', 'Buying Frequency', 'Engagement Level',
       'Preferred Buying Channels', 'Online Art Buying Behavior',
       'Past Purchases'],
      dtype='object')

In [6]:
import csv
import random
import json

# Define the possible values for each new column
ART_STYLES = [
    "Abstract", "Expressive Figurative", "Realist Figurative",
    "Contemporary Surrealism", "Landscape", "Conceptual",
    "Minimalist", "Street Art", "Neo-Abstract Expressionist", "Other"
]

FAVORITE_MEDIUMS = [
    "Oil", "Acrylic", "Watercolor", "Digital", "Sculpture",
    "Mixed Media", "Ceramics", "Photography", "Prints & Multiples"
]

BUYING_MOTIVATIONS = [
    "Build a Collection", "Decorate Home/Space",
    "Support Artists", "Inspire Me", "Investment"
]

# Input and output file names
INPUT_FILE = 'art_data.csv'
OUTPUT_FILE = 'art_data_enriched.csv'

# Read the existing CSV file
with open(INPUT_FILE, newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile)
    # Append the new column names to the existing fieldnames
    fieldnames = reader.fieldnames + [
        "Preferred Art Styles", "Favorite Mediums", "Buying Motivation"
    ]
    
    enriched_rows = []
    for row in reader:
        # Select up to 3 art styles (at least 1)
        num_art_styles = random.randint(1, 3)
        selected_art_styles = random.sample(ART_STYLES, num_art_styles)
        row["Preferred Art Styles"] = json.dumps(selected_art_styles)

        # Select up to 3 favorite mediums (at least 1)
        num_mediums = random.randint(1, 3)
        selected_mediums = random.sample(FAVORITE_MEDIUMS, num_mediums)
        row["Favorite Mediums"] = json.dumps(selected_mediums)

        # Select a random subset of buying motivations (at least 1, up to all 5)
        num_motivations = random.randint(1, len(BUYING_MOTIVATIONS))
        selected_motivations = random.sample(BUYING_MOTIVATIONS, num_motivations)
        row["Buying Motivation"] = json.dumps(selected_motivations)

        enriched_rows.append(row)

# Write the enriched data to a new CSV file
with open(OUTPUT_FILE, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(enriched_rows)

print(f"Enriched CSV file saved as {OUTPUT_FILE}")


Enriched CSV file saved as art_data_enriched.csv
