# Paintings price prediction

This notebook was inspired by idea from ODS pet projects. I collected over 12 000 paintings from popular site, that is selling paintings online via scrapy. This is a preporation notebook for future price predictions.

In [12]:
import pandas as pd
import numpy as np 
import requests

In [120]:
data = pd.read_csv('items.csv')

In [121]:
data.shape

(12719, 7)

In [122]:
data.dtypes

currency         object
image_url        object
material         object
price           float64
product_type     object
sizes            object
styles           object
dtype: object

In [123]:
data.isnull().sum()

currency        258
image_url       258
material         97
price           258
product_type    258
sizes             1
styles            5
dtype: int64

In [124]:
data.dropna(inplace = True)

In [125]:
data.shape

(12369, 7)

In [126]:
data.head()

Unnamed: 0,currency,image_url,material,price,product_type,sizes,styles
0,USD,https://images.saatchiart.com/saatchi/540866/a...,Glass,2870.0,product,32.3 W x 44.1 H x 0.2 in,"Abstract,Figurative,Street Art,Expressionism,C..."
1,USD,https://images.saatchiart.com/saatchi/93960/ar...,Paper,255.0,product,11.8 W x 9.1 H x 0.1 in,Abstract
2,USD,https://images.saatchiart.com/saatchi/803412/a...,Canvas,2540.0,product,27.2 W x 29.1 H x 1.2 in,"Abstract,Abstract Expressionism,Figurative,Fin..."
3,USD,https://images.saatchiart.com/saatchi/655953/a...,Canvas,4650.0,product,55.1 W x 69.3 H x 1 in,"Expressionism,Surrealism"
4,USD,https://images.saatchiart.com/saatchi/45266/ar...,Canvas,3210.0,product,36 W x 48 H x 1.5 in,"Abstract,Modern,Abstract Expressionism"


In [127]:
data.currency.value_counts()

USD    12369
Name: currency, dtype: int64

In [128]:
data.styles.value_counts()

Abstract                                               1244
Figurative                                              316
Abstract,Abstract Expressionism                         305
Abstract Expressionism                                  259
Expressionism                                           227
                                                       ... 
Expressionism,Portraiture,Realism,Modern,Figurative       1
Impressionism,Realism,Minimalism,Modern                   1
Abstract,Fine Art,Illustration                            1
Abstract,Modern,contemporary,diptych,mid century          1
Abstract,Modern,Surrealism,Art Deco,Fine Art              1
Name: styles, Length: 3973, dtype: int64

In [129]:
def get_h (s):
    h = s.split('x')[1].strip().split(' ') [0]
    return float(h)

def get_w (s):
    w = s.split('x')[0].strip().split(' ')[0]
    return float(w)

data['width'] = data['sizes'].apply(get_w)
data['length'] = data['sizes'].apply(get_h)

In [130]:
data.head()

Unnamed: 0,currency,image_url,material,price,product_type,sizes,styles,width,length
0,USD,https://images.saatchiart.com/saatchi/540866/a...,Glass,2870.0,product,32.3 W x 44.1 H x 0.2 in,"Abstract,Figurative,Street Art,Expressionism,C...",32.3,44.1
1,USD,https://images.saatchiart.com/saatchi/93960/ar...,Paper,255.0,product,11.8 W x 9.1 H x 0.1 in,Abstract,11.8,9.1
2,USD,https://images.saatchiart.com/saatchi/803412/a...,Canvas,2540.0,product,27.2 W x 29.1 H x 1.2 in,"Abstract,Abstract Expressionism,Figurative,Fin...",27.2,29.1
3,USD,https://images.saatchiart.com/saatchi/655953/a...,Canvas,4650.0,product,55.1 W x 69.3 H x 1 in,"Expressionism,Surrealism",55.1,69.3
4,USD,https://images.saatchiart.com/saatchi/45266/ar...,Canvas,3210.0,product,36 W x 48 H x 1.5 in,"Abstract,Modern,Abstract Expressionism",36.0,48.0


In [131]:
data.product_type.value_counts()

product    12369
Name: product_type, dtype: int64

In [132]:
cols_to_drop = ['product_type', 'currency', 'sizes']
data.drop(cols_to_drop, inplace = True, axis = 1)
data.reset_index(drop = True, inplace = True)

In [133]:
data.head()

Unnamed: 0,image_url,material,price,styles,width,length
0,https://images.saatchiart.com/saatchi/540866/a...,Glass,2870.0,"Abstract,Figurative,Street Art,Expressionism,C...",32.3,44.1
1,https://images.saatchiart.com/saatchi/93960/ar...,Paper,255.0,Abstract,11.8,9.1
2,https://images.saatchiart.com/saatchi/803412/a...,Canvas,2540.0,"Abstract,Abstract Expressionism,Figurative,Fin...",27.2,29.1
3,https://images.saatchiart.com/saatchi/655953/a...,Canvas,4650.0,"Expressionism,Surrealism",55.1,69.3
4,https://images.saatchiart.com/saatchi/45266/ar...,Canvas,3210.0,"Abstract,Modern,Abstract Expressionism",36.0,48.0


In [134]:
un_styles = []
for i in data.styles.unique():
    styles = i.split(',')
    styles = [style.strip().lower() for style in styles ]
    un_styles += styles
un_styles = set (un_styles)
len(un_styles)

243

In [135]:
un_styles

{"80's",
 'abstract',
 'abstract art',
 'abstract expressionism',
 'abstract landscape',
 'abstract modern',
 'abstract painting',
 'abstracto',
 'abstrait',
 'acrylic pour',
 'acrylic pouring',
 'animalism',
 'animalist',
 'architecture',
 'art',
 'art brut',
 'art deco',
 'art nouveau',
 'asian feel',
 'astract expressionism',
 'augmented reality',
 'avangard',
 'avangardizm',
 'avant-garde',
 'baroque',
 'black and white',
 'botanic',
 'botanical',
 'bright',
 'calligraffiti',
 'calligraphy',
 'canvas staining',
 'cartoon',
 'cats',
 'chiaroscuro',
 'chinese art',
 'cinetic',
 'circle',
 'cityscape',
 'classic',
 'classical',
 'classicism',
 'collage',
 'color',
 'color field',
 'colorfield',
 'colorist',
 'colors',
 'colourfield painting',
 'colourful',
 'conceptual',
 'constructivism',
 'contemporary',
 'contemporary art',
 'contemporary rinpa',
 'contemporarypainting',
 'contemporaty',
 'contemporay',
 'contempory',
 'cubism',
 'dada',
 'decorative',
 'diptych',
 'documentary',
 

In [136]:
for i in un_styles:
    data[i] = 0

In [137]:
data.head()

Unnamed: 0,image_url,material,price,styles,width,length,rogier van der weyden,renaissance,naiv,avangardizm,...,outsider,folk,monumental art,collage,spontaneous realism,urban expressionism,hard edge abstract,constructivism,optical art,modernism
0,https://images.saatchiart.com/saatchi/540866/a...,Glass,2870.0,"Abstract,Figurative,Street Art,Expressionism,C...",32.3,44.1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,https://images.saatchiart.com/saatchi/93960/ar...,Paper,255.0,Abstract,11.8,9.1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,https://images.saatchiart.com/saatchi/803412/a...,Canvas,2540.0,"Abstract,Abstract Expressionism,Figurative,Fin...",27.2,29.1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,https://images.saatchiart.com/saatchi/655953/a...,Canvas,4650.0,"Expressionism,Surrealism",55.1,69.3,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,https://images.saatchiart.com/saatchi/45266/ar...,Canvas,3210.0,"Abstract,Modern,Abstract Expressionism",36.0,48.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [139]:
def set_styles(dataframe):
    for i in range (len(dataframe)):
        styles = dataframe.loc[i, 'styles'].split(',')
        styles = [style.strip().lower() for style in styles]
        dataframe.loc[i, styles] = 1
    return dataframe

In [159]:
data = set_styles(data)

In [161]:
data.head()

Unnamed: 0,image_url,material,price,styles,width,length,rogier van der weyden,renaissance,naiv,avangardizm,...,outsider,folk,monumental art,collage,spontaneous realism,urban expressionism,hard edge abstract,constructivism,optical art,modernism
0,https://images.saatchiart.com/saatchi/540866/a...,Glass,2870.0,"Abstract,Figurative,Street Art,Expressionism,C...",32.3,44.1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,https://images.saatchiart.com/saatchi/93960/ar...,Paper,255.0,Abstract,11.8,9.1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,https://images.saatchiart.com/saatchi/803412/a...,Canvas,2540.0,"Abstract,Abstract Expressionism,Figurative,Fin...",27.2,29.1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,https://images.saatchiart.com/saatchi/655953/a...,Canvas,4650.0,"Expressionism,Surrealism",55.1,69.3,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,https://images.saatchiart.com/saatchi/45266/ar...,Canvas,3210.0,"Abstract,Modern,Abstract Expressionism",36.0,48.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [162]:
data["surrealism"].sum()

850

In [171]:
number = 1
for img in data['image_url']:
    p = requests.get(img)
    out = open("images\img{}.jpg".format(number), "wb")
    out.write(p.content)
    out.close()
    number += 1

In [172]:
data.to_csv('paintings_data.csv')