In [1]:
import sqlite3
import pandas as pd
import pandasql as ps
from IPython.display import display, HTML

In [2]:
# Connect to the SQLite database
conn = sqlite3.connect('soga.db')

# Query to select data from the database
query = """
SELECT 
    UPPER(SUBSTR(autor, 1, INSTR(autor, '(') - 1)) AS autor,
    a.nazov, 
    vydrazene,
    CAST(REPLACE(konecna_cena, ' ', '') AS INTEGER) AS konecna_cena,
    CAST(REPLACE(vyvolavacia_cena, ' ', '') AS INTEGER) AS vyvolavacia_cena,
    rok,
    technika,
    datum,
    url_soga
FROM diela a 
LEFT JOIN aukcie b ON a.id_aukcie = b.id 
WHERE TRUE 
-- AND autor like '%FILKO%'
ORDER BY CAST(REPLACE(konecna_cena, ' ', '') AS INTEGER) DESC;
"""

# Load data into a Pandas DataFrame
df = pd.read_sql_query(query, conn)

# Close the database connection
conn.close()

# Display the DataFrame
df.head()

Unnamed: 0,autor,nazov,vydrazene,konecna_cena,vyvolavacia_cena,rok,technika,datum,url_soga
0,DOMENICHINO,Chlapec s dalmatínom,1,531103.0,663878.0,okolo 1615-25,olej na plátne,04. Október 2005,https://www.soga.sk/aukcie-obrazy-diela-umenie...
1,BENKA MARTIN,Po žatve,1,180000.0,,1922-24,Olej na plátne,"13. November 2012, 19:00",https://www.soga.sk/aukcie-obrazy-diela-umenie...
2,BENKA MARTIN,Nad hĺbkou doliny,1,180000.0,,1948,Olej na plátne,"22. Máj 2018, 19:00",https://www.soga.sk/aukcie-obrazy-diela-umenie...
3,BENKA MARTIN,Slovenská rodina,1,176000.0,,1938 - 40,"olej na plátne, adjustované v pôvodnom ráme, v...","15. November 2011, 19:00",https://www.soga.sk/aukcie-obrazy-diela-umenie...
4,WARHOL ANDY,"Margaréta II., dánska kráľovná, z cyklu Vládnu...",1,170000.0,,1985,Farebná sieťotlač na papieri Lenox Museum. AP ...,"13. November 2012, 19:00",https://www.soga.sk/aukcie-obrazy-diela-umenie...


# Data preparation

In [3]:
from unidecode import unidecode  # Import the unidecode function
import re


# Custom function to standardize and reorder names
def standardize_and_order_name(name):
    # Convert accented characters to ASCII equivalents
    name = unidecode(name)
    name= name.upper()

    # Split the name into parts
    name_parts = name.split()

    # Reorder the name parts alphabetically
    standardized_name = ' '.join(sorted(name_parts))
    
    

    return standardized_name


def extract_first_year(date_string):
    # Regular expression pattern to match a year
    year_pattern = re.compile(r'\b\d{4}\b')

    # Find the first match in the string
    match = year_pattern.search(date_string)

    # Extract and return the matched year
    if match:
        return int(match.group())

    return None  # Return None if no year is found


In [4]:
df['autor']=df['autor'].apply(standardize_and_order_name)

In [5]:
df['olej']=df['technika'].apply(lambda x: 'olej' in x.lower())

In [6]:
df['rok_vytvorenia']=df['rok'].apply(lambda x: extract_first_year(x))
df['rok_drazby']=df['datum'].apply(lambda x: extract_first_year(x))

In [7]:
df.head()

Unnamed: 0,autor,nazov,vydrazene,konecna_cena,vyvolavacia_cena,rok,technika,datum,url_soga,olej,rok_vytvorenia,rok_drazby
0,DOMENICHINO,Chlapec s dalmatínom,1,531103.0,663878.0,okolo 1615-25,olej na plátne,04. Október 2005,https://www.soga.sk/aukcie-obrazy-diela-umenie...,True,1615.0,2005
1,BENKA MARTIN,Po žatve,1,180000.0,,1922-24,Olej na plátne,"13. November 2012, 19:00",https://www.soga.sk/aukcie-obrazy-diela-umenie...,True,1922.0,2012
2,BENKA MARTIN,Nad hĺbkou doliny,1,180000.0,,1948,Olej na plátne,"22. Máj 2018, 19:00",https://www.soga.sk/aukcie-obrazy-diela-umenie...,True,1948.0,2018
3,BENKA MARTIN,Slovenská rodina,1,176000.0,,1938 - 40,"olej na plátne, adjustované v pôvodnom ráme, v...","15. November 2011, 19:00",https://www.soga.sk/aukcie-obrazy-diela-umenie...,True,1938.0,2011
4,ANDY WARHOL,"Margaréta II., dánska kráľovná, z cyklu Vládnu...",1,170000.0,,1985,Farebná sieťotlač na papieri Lenox Museum. AP ...,"13. November 2012, 19:00",https://www.soga.sk/aukcie-obrazy-diela-umenie...,False,1985.0,2012


# Analyza

## Top 50 najdrazsich autorov podla priemernej ceny

In [8]:
# SQL-like query
query = """
select 
autor, 
count(*) pocet_vydrazenych_od_2004, 
ROUND(avg(konecna_cena)) as priemerna_vydrazena_cena_EUR,
min(konecna_cena)as minimalna_vydrazena_cena_EUR,
max(konecna_cena)as maximalna_vydrazena_cena_EUR
--sum(konecna_cena)as celkovo_vydrazene_EUR
from df 
where True 
and konecna_cena is not null 
--and olej
group by 1 
having count(*)>5
order by 3
DESC 
"""


# Apply the query using pandasql
result_df = ps.sqldf(query, locals())

# Display the result DataFrame
result_df[0:50]

Unnamed: 0,autor,pocet_vydrazenych_od_2004,priemerna_vydrazena_cena_EUR,minimalna_vydrazena_cena_EUR,maximalna_vydrazena_cena_EUR
0,ANDY WARHOL,11,47953.0,465.0,170000.0
1,SKUTETZKY,10,46090.0,700.0,120000.0
2,FOLTYN FRANTISEK,14,35791.0,320.0,162000.0
3,CYPRIAN MAJERNIK,14,32980.0,200.0,170000.0
4,JAKOBY JULIUS,40,28312.0,266.0,120000.0
5,EMIL FILLA,6,24655.0,199.0,140000.0
6,BENKA MARTIN,183,20559.0,200.0,180000.0
7,JAKABCIC MICHAL,6,15297.0,1228.0,38500.0
8,HALA JAN,165,13705.0,66.0,112000.0
9,VIERA ZILINCANOVA,14,11259.0,3600.0,18600.0


## Top 50 najdrazsich autorov - oleje

In [9]:
# SQL-like query
query = """
select 
autor, 
count(*) pocet_vydrazenych_od_2004, 
ROUND(avg(konecna_cena)) as priemerna_vydrazena_cena_EUR,
min(konecna_cena)as minimalna_vydrazena_cena_EUR,
max(konecna_cena)as maximalna_vydrazena_cena_EUR
--sum(konecna_cena)as celkovo_vydrazene_EUR
from df 
where True 
and konecna_cena is not null 
and olej
group by 1 
having count(*)>5
order by 3
DESC 
"""


# Apply the query using pandasql
result_df = ps.sqldf(query, locals())

# Display the result DataFrame
result_df[0:50]

Unnamed: 0,autor,pocet_vydrazenych_od_2004,priemerna_vydrazena_cena_EUR,minimalna_vydrazena_cena_EUR,maximalna_vydrazena_cena_EUR
0,FULLA LUDOVIT,14,66586.0,14539.0,160000.0
1,SKUTETZKY,8,57438.0,9500.0,120000.0
2,FOLTYN FRANTISEK,11,45411.0,2357.0,162000.0
3,GALANDA MIKULAS,10,39854.0,5145.0,120000.0
4,BENKA MARTIN,116,31176.0,1162.0,180000.0
5,JAKOBY JULIUS,36,30361.0,3153.0,120000.0
6,LADISLAV MEDNYANSZKY,142,21060.0,1992.0,130000.0
7,HALA JAN,102,21021.0,500.0,112000.0
8,ENDRE NEMES,9,16233.0,3500.0,34000.0
9,ALEXANDER BAZOVSKY MILOS,79,15976.0,800.0,90000.0


## Top 50 najdrazsich diel

In [13]:
# SQL-like query
query = """
select 
autor,
nazov,
konecna_cena,
--cast(rok_vytvorenia as int) rok_vytvorenia,
rok_drazby,
url_soga
from df 
where True 
and konecna_cena is not null 
--and olej
order by konecna_cena
DESC 
"""

# Apply the query using pandasql
result_df = ps.sqldf(query, locals())


# Display the result DataFrame
HTML(result_df[0:50].to_html(render_links=True, escape=False))

Unnamed: 0,autor,nazov,konecna_cena,rok_drazby,url_soga
0,DOMENICHINO,Chlapec s dalmatínom,531103.0,2005,https://www.soga.sk/aukcie-obrazy-diela-umenie-starozitnosti/aukcie/59-jesenna-aukcia-vytvarnych-diel/domenichino-domenico-zampieri-chlapec-s-dalmatinom-9260
1,BENKA MARTIN,Po žatve,180000.0,2012,https://www.soga.sk/aukcie-obrazy-diela-umenie-starozitnosti/aukcie/107-vecerna-aukcia/benka-martin-po-zatve-32046
2,BENKA MARTIN,Nad hĺbkou doliny,180000.0,2018,https://www.soga.sk/aukcie-obrazy-diela-umenie-starozitnosti/aukcie/140-vecerna-aukcia/benka-martin-nad-hlbkou-doliny-67594
3,BENKA MARTIN,Slovenská rodina,176000.0,2011,https://www.soga.sk/aukcie-obrazy-diela-umenie-starozitnosti/aukcie/100-jubilejna-vecerna-aukcia/benka-martin-slovenska-rodina-29395
4,ANDY WARHOL,"Margaréta II., dánska kráľovná, z cyklu Vládnuce kráľovné (portfólio 4 sieťotlačí)",170000.0,2012,https://www.soga.sk/aukcie-obrazy-diela-umenie-starozitnosti/aukcie/107-vecerna-aukcia/warhol-andy-margareta-ii-danska-kralovna-z-cyklu-vladnuce-kralovne-portfolio-4-sietotlaci-32127
5,CYPRIAN MAJERNIK,Utečenci,170000.0,2011,https://www.soga.sk/aukcie-obrazy-diela-umenie-starozitnosti/aukcie/100-jubilejna-vecerna-aukcia/majernik-cyprian-utecenci-29404
6,FOLTYN FRANTISEK,Krajina od Mukačeva,162000.0,2011,https://www.soga.sk/aukcie-obrazy-diela-umenie-starozitnosti/aukcie/100-jubilejna-vecerna-aukcia/foltyn-frantisek-krajina-od-mukaceva-29412
7,FULLA LUDOVIT,Blumentál v zime,160000.0,2011,https://www.soga.sk/aukcie-obrazy-diela-umenie-starozitnosti/aukcie/100-jubilejna-vecerna-aukcia/fulla-ludovit-blumental-v-zime-29378
8,BENKA MARTIN,Z Detvy,146000.0,2015,https://www.soga.sk/aukcie-obrazy-diela-umenie-starozitnosti/aukcie/125-vecerna-aukcia/benka-martin-z-detvy-62558
9,EMIL FILLA,Zátišie s azalkou,140000.0,2011,https://www.soga.sk/aukcie-obrazy-diela-umenie-starozitnosti/aukcie/100-jubilejna-vecerna-aukcia/filla-emil-zatisie-s-azalkou-29380
