### Older Dataset with IconClass Notation
These first blocks of code were written with the explorative intention of resolving and using as discrete qualitative data the natural language description of the iconclass notation, being this one chosen as a descriptive method to list all the iconographic elements of an artwork.

In [None]:
import pandas as pd

df = pd.read_csv('./dataset_wikidata_works.csv')
df.head()

In [None]:
#api test
import requests as rq
import json

request = rq.get('https://iconclass.org/%s.json'%('95A(SISYPHUS)6'))
json = request.json()
json['txt']['en']

In [None]:
# to resolve the iconclass id to natural language

iconclass = df['iconclass'].fillna('')
print(iconclass)

def iconclass_resolution(iconclass_series):
    ids = dict()
    natural_series = list()
    for i in iconclass_series:
        if i == '':
            natural_series.append('empty')
        elif i in ids:
            natural_series.append(ids[i])
        else:
            try:
                request = rq.get('https://iconclass.org/%s.json'%(i))
                json_data = request.json()
                text = json_data['txt']['en']
                print(text)
                natural_series.append(text)
                ids[i] = text
            except:
                print(f'{i} this code is not working')
    return pd.Series(natural_series)

iconclass_resolution(iconclass_series=iconclass)

# Newer Dataset, guess what?...without IconClass notation 
These blocks will perform some exploratory and manipulative operations on the renewed dataset, it is in fact necessary to have nice visualization and truly understand the quantitative nature of the dataset itself.

It is extracted from wikidata, we used mainly the wikidata query endpoint (https://query.wikidata.org), it has a built-in query system to rapidly browse properties and classes.

The dataset itself presents some limitations that will be further discussed.

```
select ?movement ?movementLabel ?movstart ?movend ?artist ?artistLabel (group_concat (?viafart) as ?viafartist)
?works ?worksLabel  (group_concat (?viafwork) as ?viafworks) ?workinception
?symbols ?symbolsLabel ?genre ?genreLabel

where {

    {?artist wdt:P106 wd:Q1028181 .} UNION {?artist wdt:P106 wd:Q1925963 .}
    {?artist wdt:P135 ?movement .}
    optional {?movement wdt:P580 ?movstart .}
    optional {?movement wdt:P582 ?movend .}
    {?artist wdt:P800 ?works .}
    optional {?artist wdt:P214 ?viafart .}
    {?2dart wdt:P279 wd:Q110304307 .}
    {?works wdt:P31 ?2dart .}
    optional {?works wdt:P136 ?genre .}
    optional {?works wdt:P214 ?viafwork .}
    optional {?works wdt:P571 ?workinception .}
    {?works wdt:P180 ?symbols .} # union {?works wdt:P1257 ?iconclass .}
    SERVICE wikibase:label {bd:serviceParam wikibase:language "en".}

}
group by ?movement ?movementLabel ?movstart ?movend ?artist ?artistLabel ?works ?worksLabel ?workinception ?symbols ?symbolsLabel ?iconclass ?genre ?genreLabel
```




In [None]:
#dataset import and description

import pandas as pd

df = pd.read_csv('./dataset_wikidata.csv')

print(df.info())
print(df.describe())

In [None]:
#regroup naturalism and expressionism
for index, row in df.iterrows():
    if row['movementLabel'] == 'expressionism':
        df.at[index, 'movementLabel'] = 'Expressionism'
        df.at[index, 'movement'] = 'https://www.wikidata.org/wiki/Q80113'
    if row['movementLabel'] == 'naturalism':
        df.at[index, 'movementLabel'] = 'Naturalism'
        df.at[index, 'movement'] = 'https://www.wikidata.org/wiki/Q55995'

print(df.to_string())

In [None]:
#grouping per movement and labels + counts

work_group = df.groupby('worksLabel')

symbols_group = df.groupby('symbolsLabel')
df_symbols_freq = symbols_group.count()
print(df_symbols_freq.to_string())

In [None]:
# Converting to the centuries all inception and movstart values

df_centuries = df
df_centuries = df_centuries.fillna('empty').rename(columns={'workinception':'century'})
for index, row in df_centuries.iterrows():    
    try:    
        if row['century'] != 'empty':  
            date = str(row['century']).split('-')[0]
            # date = datetime.date.fromisoformat(date)
            # year = date.year
            df_centuries.at[index, 'century'] = (int(date) // 100 * 100)
    except:
        print(f"{row['century']} - ERROR")

print(df_centuries.groupby(['symbolsLabel','century']).count().to_string())

In [None]:
#peek into time values for movements, unifying inception and movstart

df_time = df.fillna('empty')

for index, row in df_time.iterrows():
    if row['movstart'] == 'empty':
        df_time.at[index, 'movstart'] = row['inception']

df_time = df_time.drop(columns=['artist','artistLabel',
'viafartist','works','worksLabel','viafworks','symbols',
'symbolsLabel','genre','genreLabel',
'movement','inception','workinception']).drop_duplicates().reset_index()

print(df_time.drop(columns=['index']).to_string())

In [None]:
#top movements per works

movement_group = df.groupby('movementLabel').nunique().sort_values('works')
movement_group[['works']].sort_values('works', ascending=False)

In [None]:
#top artists per works

df_artist = df.groupby('artistLabel').nunique()
df_artist[['works']].sort_values(by='works', ascending=False)

# Timeline graph (self contained)
Data prepping for the timeline graph

In [None]:
import pandas as pd
file_path = 'dataset_wikidata.csv'
df = pd.read_csv(file_path)

#regroup naturalism and expressionism
for index, row in df.iterrows():
    if row['movementLabel'] == 'expressionism':
        df.at[index, 'movementLabel'] = 'Expressionism'
        df.at[index, 'movement'] = 'https://www.wikidata.org/wiki/Q80113'
    if row['movementLabel'] == 'naturalism':
        df.at[index, 'movementLabel'] = 'Naturalism'
        df.at[index, 'movement'] = 'https://www.wikidata.org/wiki/Q55995'


# Seleziona le colonne desiderate
df_subset = df[['movementLabel', 'movstart', 'movend', 'inception', 'works']]

# Raggruppa per 'movementLabel' e conta il numero di opere in ciascun gruppo
df_grouped = df_subset.groupby('movementLabel').agg({
    'movstart': 'first',   # Assumi lo stesso valore di 'movstart' per tutto il gruppo
    'movend': 'first',     # Assumi lo stesso valore di 'movend' per tutto il gruppo
    'inception': 'first', # Assumi lo stesso valore di 'inception' per tutto il gruppo
    'works': 'count'       # Conta il numero di opere nel gruppo
}).reset_index()

# Rinomina la colonna di conteggio
df_grouped = df_grouped.rename(columns={'works': 'number_of_works'})

# Ordina il dataframe in base al numero di opere in ordine decrescente
df_sorted = df_grouped.sort_values(by='number_of_works', ascending=False)

# Stampa il dataframe risultante
df_sorted

In [None]:
df_temp = df_sorted.head(20).fillna('empty')

for index, row in df_temp.iterrows():
    if row['movstart'] == 'empty':
        df_temp.at[index, 'movstart'] = row['inception']

for index, row in df_temp.iterrows():    
    if row['movstart'] != 'empty':
        start_year = str(row['movstart']).split('-')[0]
        df_temp.at[index, 'movstart'] = (int(start_year))
    if row['movend'] != 'empty':
        end_year= str(row['movend']).split('-')[0]
        df_temp.at[index, 'movend'] = (int(end_year))

df_time = df_temp.drop(columns=['inception']).drop_duplicates().reset_index()


print(df_time.to_string())

# Symbols in time and percentages (self contained)

Data prepping for the evolution in time of main symbols and their percentages in centuries 

In [None]:
#dataset import and description

import pandas as pd

df = pd.read_csv('./dataset_wikidata.csv')

print(df.info())
print(df.describe())

#regroup naturalism and expressionism
for index, row in df.iterrows():
    if row['movementLabel'] == 'expressionism':
        df.at[index, 'movementLabel'] = 'Expressionism'
        df.at[index, 'movement'] = 'https://www.wikidata.org/wiki/Q80113'
    if row['movementLabel'] == 'naturalism':
        df.at[index, 'movementLabel'] = 'Naturalism'
        df.at[index, 'movement'] = 'https://www.wikidata.org/wiki/Q55995'

df

In [None]:
df_centuries = df
df_centuries = df_centuries.fillna('empty')
for index, row in df_centuries.iterrows():    
    try:    
        if row['workinception'] != 'empty':  
            date = str(row['workinception']).split('-')[0]
            # date = datetime.date.fromisoformat(date)
            # year = date.year
            df_centuries.at[index, 'workinception'] = (int(date) // 100 * 100)
    except:
        print(row['workinception'])
        print(index)

df_centuries = df_centuries.rename(columns={'workinception': 'century'})
df_centuries

In [None]:
df_test = df_centuries[['symbolsLabel', 'century']]
df_test.head()

In [None]:
df_count = df_test.value_counts('symbolsLabel').sort_values(ascending=False)
print(df_count)
df_topk = df_count.head(10)
print(df_topk)

In [None]:
df_topk_list = df_topk.index.tolist()
print(df_topk_list)

In [None]:
# da sistemare
df_filtered = df_test[df_test['symbolsLabel'].isin(df_topk_list)]
# print(df_filtered)

# df_filtered = df_test.query("symbolsLabel in @topk_list")
sym_in_time = df_filtered.groupby('century')['symbolsLabel'].value_counts()
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(sym_in_time)

In [None]:
# df_filtered = df_test.query("symbolsLabel in @topk_list")
sym_in_time = df_test.groupby('century')['symbolsLabel'].value_counts()
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(sym_in_time)

In [None]:
sym_total_in_time = df_test.groupby('century')['symbolsLabel'].count()
sym_total_in_time

In [None]:
import pandas as pd

# Dati delle occorrenze dei simboli per ogni secolo
data_symbols = {
    'century': [1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000, 'empty', 'http://www.wikidata.org/.well-known/genid/c2e5bc33ccf184eb506b4c4166309cf9'],
    'Virgin Mary': [6, 16, 102, 89, 20, 1, 19, 5, 0, 1, 0],
    'woman': [6, 8, 98, 129, 115, 62, 477, 228, 6, 30, 0],
    'waist-length hair': [1, 0, 8, 42, 31, 12, 127, 23, 4, 0, 2],
    'Jesus': [0, 9, 56, 50, 28, 4, 39, 5, 0, 0, 0],
    'man': [0, 4, 61, 86, 117, 42, 248, 73, 3, 3, 0],
    'sky': [0, 0, 10, 32, 56, 17, 184, 36, 4, 6, 0],
    'sitting': [0, 0, 8, 18, 8, 23, 179, 37, 0, 12, 0],
    'tree': [0, 0, 8, 27, 51, 18, 141, 37, 0, 14, 0],
    'cloud': [0, 0, 3, 19, 44, 10, 92, 14, 4, 4, 0],
    'nudity': [0, 0, 2, 45, 38, 19, 151, 77, 4, 13, 0]
}

# Dati totali dei simboli per ogni secolo
total_symbols = {
    'century': [1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000, 'empty', 'http://www.wikidata.org/.well-known/genid/c2e5bc33ccf184eb506b4c4166309cf9'],
    'total': [52, 109, 1571, 2810, 3611, 1564, 11236, 3020, 294, 436, 50]
}

# Convertiamo i dizionari in dataframe
df_symbols = pd.DataFrame(data_symbols)
df_total = pd.DataFrame(total_symbols)

# Uniamo i due dataframe sulla colonna 'century'
df_merged = pd.merge(df_symbols, df_total, on='century')

# Calcoliamo la percentuale delle occorrenze di ciascun simbolo rispetto al totale per ogni secolo
symbol_columns = df_symbols.columns[1:]
for symbol in symbol_columns:
    df_merged[f'{symbol}_percent'] = (df_merged[symbol] / df_merged['total']) * 100

# Visualizziamo il risultato
df_merged[['century'] + [f'{symbol}_percent' for symbol in symbol_columns]]

# Genre in Time (self contained)
Data prepping for the genre in time timeline

In [None]:
import pandas as pd

df = pd.read_csv('dataset_wikidata.csv')

genre_group = df[['movementLabel','genreLabel']]
genre_count = genre_group.value_counts('genreLabel').sort_values(ascending=False)
movement_count = genre_group.value_counts('movementLabel').sort_values(ascending=False)
genre_top = genre_count.head(10).index.tolist()
movement_top = movement_count.head(10).index.tolist()
movement_top

In [None]:
#top 10 genre and movements

genre_filtered = genre_group[genre_group['genreLabel'].isin(genre_top)]
movement_filtered = genre_group[genre_group['movementLabel'].isin(movement_top)][genre_group['genreLabel'].isin(genre_top)]
combined_df = pd.concat([genre_filtered, movement_filtered])
print(movement_filtered.groupby('movementLabel')['genreLabel'].value_counts().to_string())


In [None]:
opposed_filtered = movement_filtered[movement_filtered['genreLabel'].isin(['landscape painting', 'portrait', 'group portrait', 'nude'])]
print(opposed_filtered.groupby('movementLabel').value_counts().to_string())

In [None]:
work_group = df.sort_values('works').reset_index()
print(work_group[['worksLabel','genreLabel']].drop_duplicates().to_string())