In [2]:
import os
import itertools
import pandas as pd
from slugify import slugify
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import List
from dotenv import load_dotenv

In [None]:
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

In [None]:
def gen_system_prompt():
    prompt = """
    You are an expert art historian with comprehensive knowledge of painting movements and visual arts from around 
    the world, spanning from ancient to contemporary periods. Your expertise covers major art movements, influential 
    artists, and significant artworks throughout history.

    When presented with an art movement, artist, or artwork, provide detailed and accurate information as follows:

    For art movements:
    - Historical context and time period
    - Key characteristics and techniques
    - Key artists
    - Notable works representing the movement
    - Cultural and historical significance

    For artists:
    - Biographical details and artistic development
    - Signature style and techniques
    - Major works and achievements
    - Influences and artistic relationships
    - Position within broader art movements

    For artworks:
    - Visual analysis and composition
    - Techniques and materials used
    - Symbolic elements and interpretation
    - Historical and cultural context
    - Impact and reception
    - Current location

    Structure your responses clearly with appropriate headings and concise paragraphs. Balance academic insight with 
    accessibility, using precise art terminology while remaining understandable to non-specialists.

    When relevant, draw connections between related movements, artists, or historical developments to provide 
    comprehensive context. Include thoughtful recommendations for further exploration based on the user's initial 
    query.

    Respond confidently with authoritative knowledge while acknowledging the complexity and subjective nature of art 
    interpretation where appropriate.
    """

    return prompt

In [13]:
def gen_istructions_schools(school):
    prompt = f"""
    I need comprehensive information about the art movement: {school}

    Please provide the following details:

    1. Historical context (max 300 words): Explain the social, political, and cultural factors that influenced 
    this movement's emergence and development.

    2. Time period: Specify the years in Common Era format (e.g., "1700 to 1845").

    3. Key characteristics and techniques (max 300 words): Describe the distinctive visual elements, philosophical 
    approaches, subject matter preferences, and technical innovations that define this movement.

    4. Key artists (5-10 painters only): Provide as a Python list format [e.g., ['Claude Monet', 'Vincent Van Gogh']], 
    listing the most influential painters in order of significance to the movement.

    5. Notable paintings (5-10 works): Provide as a Python list [e.g., ['Girl with a Pearl Earring', 'The Starry Night']], 
    including only the most representative or revolutionary works from this movement.

    ANSWER FORMAT

    Your answer must be structured as a valid JSON object following this exact schema:
    {{
        "context": "A string containing the historical context (max 300 words)",
        "time_period": "A string containing the time period in format 'YYYY to YYYY'",
        "characteristics": "A string containing the key characteristics (max 300 words)",
        "artists": ["Artist1", "Artist2", "Artist3", "Artist4", "Artist5"],
        "paintings": ["Painting1", "Painting2", "Painting3", "Painting4", "Painting5"]
    }}

    ADDITIONAL CONSIDERATIONS

    - Do not include any text, explanations, or comments outside the JSON object
    - Ensure the JSON is properly formatted and valid
    - Use double quotes for all strings within the JSON
    - Do not use any formatting (bold, italics, etc.) within the JSON values

    Thank you
    """

    return prompt

In [77]:
def gen_istructions_artists(artist):
    prompt = f"""
    I need comprehensive information about the following artist: {artist}

    Please provide the following details:

    1. Biography information (max 400 words): Provide biographical information as well as a brief artistic development 
    for this artist.

    2. Country: Country of procedence.

    3. Time period: Specify the years in Common Era format (e.g., "1700 to 1845").

    4. Key characteristics and techniques (max 300 words): Describe the distinctive visual elements, philosophical 
    approaches, subject matter preferences, and technical innovations that define this artist.

    5. Notable paintings (3-6 works): Provide as a Python list [e.g., ['Girl with a Pearl Earring', 'The Starry Night']], 
    including only the most representative or revolutionary works from this artist.

    ANSWER FORMAT

    Your answer must be structured as a valid JSON object following this exact schema:
    {{
        "artist_name": "A string containing the artist name",
        "biography": "A string containing the historical context (max 400 words)",
        "country" : "A string containing the country of procedence",
        "time_period": "A string containing the time period in format 'YYYY to YYYY'",
        "characteristics": "A string containing the key characteristics (max 300 words)",
        "paintings": ["Painting1", "Painting2", "Painting3", "Painting4""]
    }}

    ADDITIONAL CONSIDERATIONS

    - Do not include any text, explanations, or comments outside the JSON object
    - Ensure the JSON is properly formatted and valid
    - Use double quotes for all strings within the JSON
    - Do not use any formatting (bold, italics, etc.) within the JSON values

    Thank you
    """

    return prompt

In [129]:
def gen_istructions_paintings(painting, artist):
    prompt = f"""
    I need comprehensive information about the following painting: {painting}, from {artist}

    Please provide the following details:

    1. Year: When was this painting created? If the exact year is unknown, provide the best available estimate or range (e.g., "circa 1650" or "1503-1506").

    2. Context (max 300 words): Describe the historical and artistic context of this painting, including if possible:
    - What inspired the artist to create it
    - Its significance in art history
    - Any notable technical innovations or stylistic elements
    - The cultural or historical circumstances surrounding its creation

    3. Current location: Where is this painting currently housed? Provide the name of the museum, gallery, or collection and its location (city, country).

    ANSWER FORMAT

    Your answer must be structured as a valid JSON object following this exact schema:
    {{
        "year": "A string containing the approximate year of painting format 'YYYY'",
        "context": "A string containing the historical and artistic context (max 300 words)",
        "location": "A string containing the current location of the painting"
    }}

    ADDITIONAL CONSIDERATIONS

    - Do not include any text, explanations, or comments outside the JSON object
    - Ensure the JSON is properly formatted and valid
    - Use double quotes for all strings within the JSON
    - Do not use any formatting (bold, italics, etc.) within the JSON values

    Thank you
    """

    return prompt

In [146]:
class School(BaseModel):
    context: str = Field(..., description="Historical context of the art movement (max 300 words)")
    time_period: str = Field(..., description="Time period in format 'YYYY to YYYY'")
    characteristics: str = Field(..., description="Key characteristics of the art movement (max 300 words)")
    artists: List[str] = Field(..., description="List of influential painters in order of significance")
    paintings: List[str] = Field(..., description="List of representative or revolutionary paintings")

class Artist(BaseModel):
    artist_name: str = Field(..., description="Artist name")
    biography: str = Field(..., description="Biographical information of the artist (max 400 words)")
    country: str = Field(..., description="Country of procedence")
    time_period: str = Field(..., description="Time period in format 'YYYY to YYYY'")
    characteristics: str = Field(..., description="Key characteristics of the artist (max 300 words)")
    paintings: List[str] = Field(..., description="List of notable paintings")

class Painting(BaseModel):
    artist_name: str = Field(None, description="Name of the artist")
    painting_name: str = Field(None, description="Name of the painting")
    year: str = Field(..., description="Year in format 'YYYY'")
    context: str = Field(..., description="Historical and artistic context (max 300 words)")
    location: str = Field(..., description="Current location of the painting")

In [11]:
client = OpenAI(
    api_key = api_key
)

In [144]:
def get_data(target, level, artist = None):
    if level in ["school"]:
        history = [
            {"role": "system", "content": gen_system_prompt()},
            {"role": "user",   "content": gen_istructions_schools(target)}
        ]
        chat_completion = client.beta.chat.completions.parse(
            messages = history,
            model    = "gpt-4o-2024-08-06",
            response_format = School
        )
        result = chat_completion.choices[0].message.parsed

    if level in ["artist"]:
        history = [
            {"role": "system", "content": gen_system_prompt()},
            {"role": "user",   "content": gen_istructions_schools(target)}
        ]
        chat_completion = client.beta.chat.completions.parse(
            messages = history,
            model    = "gpt-4o-2024-08-06",
            response_format = Artist
        )
        result = chat_completion.choices[0].message.parsed

    if level in ["painting"]:
        history = [
            {"role": "system", "content": gen_system_prompt()},
            {"role": "user",   "content": gen_istructions_paintings(target, artist)}
        ]
        chat_completion = client.beta.chat.completions.parse(
            messages = history,
            model    = "gpt-4o-2024-08-06",
            response_format = Painting
        )
        result = chat_completion.choices[0].message.parsed
        result.artist_name = artist
        result.painting_name = target

    return result

In [None]:
schools = [
    "Renaissance", "Baroque", "Rococo", "Neoclassicism", "Romanticism", "Realism", "Impressionism", "Post-Impressionism", 
    "Expressionism", "Cubism", "Surrealism", "Abstract Expressionism", "Pop Art", "Contemporary Art", "Fauvism"
]
data_schools = [get_data(s, level = "school") for s in schools]

In [113]:
school_dicts = [school.model_dump() for school in data_schools]
df_schools = pd.DataFrame(school_dicts)
df_schools["movement"] = schools
df_schools.to_csv("schools_dta.csv")
df_schools[['movement', 'time_period', 'context', 'characteristics']].to_csv("schools_table.csv", index=False)

In [80]:
artists = set(list(
    itertools.chain.from_iterable(
        df_schools.artists.to_list()
    )
))
data_artists = [get_data(a, level = "artist") for a in artists]

In [105]:
artist_dicts = [artist.model_dump() for artist in data_artists]
df_artists = pd.DataFrame(artist_dicts)

In [106]:
df_artists.loc[df_artists['artist_name'] == 'Raphael Sanzio', 'artist_name'] = 'Raphael'
df_artists.loc[df_artists['artist_name'] == 'Élisabeth Louise Vigée Le Brun', 'artist_name'] = 'Élisabeth Vigée Le Brun'
df_artists = df_artists[~df_artists['artist_name'].isin(['Donatello', 'Gian Lorenzo Bernini'])]

In [115]:
movement2artist = dict(zip(df_schools.movement, df_schools.artists))
def get_movement(artist, map = movement2artist):
    mvs = [
        mov for mov,arts in map.items()
        if artist in arts
    ]
    return ', '.join(mvs)

df_artists['movement'] = df_artists['artist_name'].apply(lambda x: get_movement(x))
df_artists.to_csv("artists_dta.csv")

df_artists.loc[df_artists['artist_name'] == 'Jean-Auguste-Dominique Ingres', 'movement'] = 'Neoclassicism'
df_artists.loc[df_artists['artist_name'] == 'Édouard Manet', 'movement'] = 'Impressionism'
df_artists.loc[df_artists['artist_name'] == 'Georges Braque', 'movement'] = 'Cubism'

df_artists[['artist_name', 'country', 'time_period', 'movement', 'biography', 'characteristics']].to_csv("artists_table.csv", index=False)

In [None]:
paintings2artists = dict(zip(df_artists.artist_name, df_artists.paintings))
data_paintings = [
    get_data(p, level = "painting", artist = a)
    for a,ps in paintings2artists.items()
    for p in ps
    # if a in (['Berthe Morisot']) # for testing
]

In [153]:
paintings_dicts = [painting.model_dump() for painting in data_paintings]
df_paintings = pd.DataFrame(paintings_dicts)
df_paintings.to_csv("paintings_table.csv", index=False)

In [5]:
df_schools = pd.read_csv("schools_table.csv")
df_artists = pd.read_csv("artists_table.csv")
df_paintings = pd.read_csv("paintings_table.csv")

In [8]:
df_schools['slug'] = df_schools['movement'].apply(lambda x: slugify(x))
df_artists['slug'] = df_artists['artist_name'].apply(lambda x: slugify(x))
df_paintings['slug'] = df_paintings['painting_name'].apply(lambda x: slugify(x))

In [9]:
df_schools.to_csv("schools_table.csv", index=False)
df_artists.to_csv("artists_table.csv", index=False)
df_paintings.to_csv("paintings_table.csv", index=False)