In [1]:
import numpy as np
import pandas as pd

pd.options.display.float_format = '{:.1f}'.format # change the display format of numbers

In [2]:
saffronart_df = pd.read_csv('saffronart_dataset.csv') 

# change the dates to Pandas datetime objects:
saffronart_df['auction_date'] = pd.to_datetime(saffronart_df['auction_date'])
saffronart_df['birth_date'] = pd.to_datetime(saffronart_df['birth_date'])
saffronart_df['death_date'] = pd.to_datetime(saffronart_df['death_date'])

saffronart_df.head(5)

Unnamed: 0,artist,birth_date,birth_place,death_date,artist_age,title,winning_bid,low_est,high_est,auction_date,category,style,size,medium
0,akhilesh,1956-01-01,Indore,NaT,63.0,Divine Conversation,9000.0,5000.0,7000.0,2019-06-01,painting,abstract,71.5 x 71.5 in,acrylic on canvas
1,akhilesh,1956-01-01,Indore,NaT,63.0,In search of Untitled forms -II,1120.0,5000.0,6670.0,2013-11-01,painting,abstract,32.5 x 44 in,acrylic on canvas
2,akhilesh,1956-01-01,Indore,NaT,63.0,In Search of Untitled Lines,6186.0,6900.0,8625.0,2013-08-01,painting,,47 x 47 in,acrylic on canvas
3,akhilesh,1956-01-01,Indore,NaT,63.0,Magadhi,2400.0,5770.0,7695.0,2013-02-01,painting,abstract,33 x 44.5 in,acrylic on canvas
4,akhilesh,1956-01-01,Indore,NaT,63.0,Untitled,2942.0,6735.0,8655.0,2013-02-01,painting,abstract,40 x 40 in,acrylic on canvas


In [3]:
saffronart_df.tail(5)

Unnamed: 0,artist,birth_date,birth_place,death_date,artist_age,title,winning_bid,low_est,high_est,auction_date,category,style,size,medium
12416,atul dodiya,1959-01-20,Mumbai,NaT,60.0,Walking Man,30475.0,20000.0,26670.0,2010-09-01,painting,figurative,21 x 21 in,oil on paper
12417,m f husain,1913-09-17,"Pandharpur, Maharashtra",2011-06-09,98.0,Untitled,340000.0,166670.0,250000.0,2014-09-04,painting,figurative,118 x 46 in,oil on canvas
12418,nasreen mohamedi,1937-01-01,Karachi India (now in Pakistan),1990-01-01,53.0,Untitled,11352.0,8700.0,10870.0,2011-09-01,drawing,abstract,10.5 x 13.5 in,ink and pencil on card paper
12419,baiju parthan,1956-01-01,"Kottayam, Kerala",NaT,63.0,Process - (Fruit),4426.0,2460.0,4100.0,2015-04-07,print making,still life,36.5 x 34.5 in,archival ink on hahnemuhle archival paper
12420,thota vaikuntam,1942-01-01,Boorugupali Karimnagar Andhra Pradesh,NaT,77.0,Untitled,26554.0,11115.0,13335.0,2010-09-01,painting,figurative,36 x 24 in,acrylic on canvas board


In [4]:
# check outliers:

oldest_artist = saffronart_df.iloc[saffronart_df.artist_age.idxmax()]['artist': 'artist_age']
print("OLDEST ARTIST:", oldest_artist, sep = '\n')

youngest_artist = saffronart_df.iloc[saffronart_df.artist_age.idxmin()]['artist': 'artist_age']
print("\nYOUNGEST ARTIST:", youngest_artist, sep = '\n')

most_exp_lot = saffronart_df.iloc[saffronart_df.winning_bid.idxmax()] 
print("\nMOST EXPENSIVE LOT:", most_exp_lot, sep = '\n')

cheapest_lot = saffronart_df.iloc[saffronart_df.winning_bid.idxmin()] 
print("\nCHEAPEST LOT:", cheapest_lot, sep = '\n')

OLDEST ARTIST:
artist            devayani krishna
birth_date     1918-07-08 00:00:00
birth_place                    NaN
death_date                     NaT
artist_age                   101.0
Name: 5555, dtype: object

YOUNGEST ARTIST:
artist             girish dahiwale
birth_date     1974-01-01 00:00:00
birth_place                    NaN
death_date     1998-01-01 00:00:00
artist_age                    24.0
Name: 2083, dtype: object

MOST EXPENSIVE LOT:
artist                   tyeb mehta
birth_date      1925-01-01 00:00:00
birth_place                     NaN
death_date      2009-01-01 00:00:00
artist_age                     84.0
title                          Kali
winning_bid               3998000.0
low_est                   3000000.0
high_est                  4000000.0
auction_date    2018-06-01 00:00:00
category                   painting
style                    figurative
size                     67 x 54 in
medium               oil on canvas 
Name: 6427, dtype: object

CHEAPEST LOT:

###### I checked these records on saffronart.com and they're all correct. 

In [5]:
# number of lots by auction date: 

lots_number = saffronart_df.auction_date.value_counts()
lots_number = lots_number.to_frame()

lots_number.reset_index(inplace = True)
lots_number.rename(columns = {'index': 'auction_date', 'auction_date': 'lots_number'}, inplace = True)
lots_number['day_of_week'] = pd.Series(lots_number['auction_date']).dt.dayofweek

lots_number

Unnamed: 0,auction_date,lots_number,day_of_week
0,2002-05-01,205,2
1,2001-12-01,194,5
2,2012-07-01,194,6
3,2018-12-01,187,5
4,2005-12-01,177,3
5,2014-12-01,176,0
6,2015-08-01,173,5
7,2015-03-01,173,6
8,2015-07-01,172,2
9,2002-12-01,169,6


We notice that the auction dates on which only one or a few lots are for sale are all Tuesdays (0 is Monday and 6 is Sunday). This is a series of events called "Absolute Tuesdays" by StoryLTD. The numbers of lots for sale at these events vary and sometimes there's only one art piece open for sale. For auction dates with more lots, it's hard to check because Saffronart.com only lists the lots that actually got sold when grouping them by auction. Also, when cleaning the data, we remove lots that are not paintings.