In [2]:
from bs4 import BeautifulSoup
import os
import requests
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from skimage import io
import skimage
from skimage import data
from skimage.color import rgb2hsv
from skimage.filters.rank import entropy
from skimage.morphology import disk
from skimage.color import rgb2gray
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import sklearn
import numpy as np

In [3]:
DATA_DIR = 'Desktop/art_scraping/data/'
ARTIST_URL = 'https://www.wikiart.org/en/{artist}/all-works/text-list'
PAINTING_URL = 'https://www.wikiart.org{painting_path}'

if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

In [4]:
artist_name = 'mark-rothko'
url_query = ARTIST_URL.format(artist=artist_name)
artist_page = requests.get(url_query)

In [5]:
try:
    artist_page.raise_for_status()
except requests.exceptions.HTTPError as e:
    print("Error trying to retrieve {}".format(artist_page.url))
    raise e

In [6]:
soup = BeautifulSoup(artist_page.text, 'lxml')
IMAGE_DIR = os.path.join(DATA_DIR, artist_name)
if not os.path.exists(IMAGE_DIR):
    os.makedirs(IMAGE_DIR)

In [7]:
painting_paths = []

for li in soup.find_all('li', {'class': 'painting-list-text-row'}):

    for link in li.find_all('a'):
        href = link.get('href')
        # store in dictionary
        painting_paths.append(href)

print(len(painting_paths))

169


In [8]:
def download_and_save(painting_url):
    r_painting_page = requests.get(painting_url)
    soup = BeautifulSoup(r_painting_page.text, "html.parser")
    #print(soup)
    #for img in soup.find_all('img', {'class': 'ms-zoom-cursor'}):
    for img in soup.find_all('img'):
        #print (img)
        img_url = img['src']
        img_url = img_url.split('!')[0]
        filename = img_url.split('/')[-1]

        outfile = os.path.join(IMAGE_DIR, filename)                       
        if not os.path.exists(outfile):                        
            print("downloading {}: {}".format(filename, img_url))
            r = requests.get(img_url, outfile)
            with open(outfile, 'wb') as f:
                f.write(r.content)
        else:
            #print("skipping {}".format(filename))
            pass

In [None]:
#for path in painting_paths[:169]:
for path in painting_paths:
    painting_path = PAINTING_URL.format(painting_path=path)
    download_and_save(painting_path)

In [None]:
def calc_stats(filename):
    image = io.imread(os.path.join(IMAGE_DIR, filename))
    width = image.shape[0]
    height = image.shape[1]
    hsv_img = rgb2hsv(image)
    hue_img = hsv_img[:, :, 0]
    saturation_img = hsv_img[:,:, 1]
    value_img = hsv_img[:, :, 2]
    mean_hue = np.mean(hue_img)
    mean_sat = np.mean(saturation_img)
    mean_value = np.mean(value_img)
    return [width, height, mean_hue, mean_sat, mean_value]

In [None]:
df = pd.DataFrame(columns = ['width', 'height', 'mean_hue', 'mean_saturation', 'mean_value'])
for file in os.listdir(IMAGE_DIR):
    if file.endswith('.jpg'):
        data = calc_stats(file)
        dicti = {'width': data[0], 'height': data[1], 'mean_hue': data[2], 'mean_saturation': data[3], 'mean_value': data[4]}
        df = df.append(dicti, ignore_index=True)

In [None]:
def make_thumbnail(filename, imagepath, thumbnailpath):
    path = os.path.join(imagepath, filename)
    image = Image.open(path)
    size = 256, 256
    image.thumbnail(size)
    image.save(os.path.join(thumbnailpath, filename))

In [None]:
THUMB_DIR = os.path.join(DATA_DIR, 'thumbnails-{artist}'.format(artist=artist_name))
if not os.path.exists(THUMB_DIR):
    os.makedirs(THUMB_DIR)

In [None]:
for file in os.listdir(IMAGE_DIR):
    if file.endswith('.jpg'):  
        make_thumbnail(file, IMAGE_DIR, THUMB_DIR)

In [None]:
num_bins = 10
n, bins, patches = plt.hist(df['mean_value'], num_bins, facecolor='blue', alpha=0.5, )
plt.xlabel('Average Values')
plt.show()

In [None]:
num_bins = 10
n, bins, patches = plt.hist(df['mean_hue'], num_bins, facecolor='blue', alpha=0.5, )
plt.xlabel('Average Hues')
plt.show()

In [None]:
num_bins = 10
n, bins, patches = plt.hist(df['mean_saturation'], num_bins, facecolor='blue', alpha=0.5, )
plt.xlabel('Average Saturation')
plt.show()

In [None]:
num_bins = 10
n, bins, patches = plt.hist(df['height'], num_bins, facecolor='blue', alpha=0.5, )
plt.xlabel('Average Heights')
plt.show()

In [None]:
num_bins = 10
n, bins, patches = plt.hist(df['width'], num_bins, facecolor='blue', alpha=0.5, )
plt.xlabel('Average Widths')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
ax.scatter(x = df['mean_value'], y = df['mean_hue'])
plt.xlabel("Average Value")
plt.ylabel("Average Hue")
plt.show()

In [None]:
'''# create background image
GLOBAL_WIDTH = 7500
bg_color = (192, 192, 192) # gray, you can choose your own
figure = Image.new('RGB', (GLOBAL_WIDTH, GLOBAL_WIDTH), bg_color)
num_points = 100'''

In [None]:
RESULT_DIR = os.path.join(DATA_DIR, 'results-{artist}'.format(artist=artist_name))
if not os.path.exists(RESULT_DIR):
    os.makedirs(RESULT_DIR)

In [None]:
thumbs = os.listdir(THUMB_DIR)

In [None]:
GLOBAL_WIDTH = 5000
bg_color = (192, 192, 192)
figure1 = Image.new('RGB', (GLOBAL_WIDTH, GLOBAL_WIDTH), bg_color)
x_coords = (GLOBAL_WIDTH * df['mean_value']).apply(int)
y_coords = (GLOBAL_WIDTH * df['mean_hue']).apply(int)
for i in range(len(thumbs)):
    thumb_img = Image.open(os.path.join(THUMB_DIR, thumbs[i]))
    figure1.paste(thumb_img, (x_coords[i], y_coords[i]))

In [None]:
figure1.save(os.path.join(RESULT_DIR, 'brightness_vs_hue.jpg'))

In [None]:
bg_color = (192, 192, 192)
figure2 = Image.new('RGB', (GLOBAL_WIDTH, GLOBAL_WIDTH), bg_color)
x_coords = (GLOBAL_WIDTH * df['mean_saturation']).apply(int)
y_coords = (GLOBAL_WIDTH * df['mean_hue']).apply(int)
for i in range(len(thumbs)):
    thumb_img = Image.open(os.path.join(THUMB_DIR, thumbs[i]))
    figure2.paste(thumb_img, (x_coords[i], y_coords[i]))

In [None]:
figure2.save(os.path.join(RESULT_DIR, 'saturation_vs_hue.jpg'))

In [None]:
def calc_adv_stats(filename):
    image = io.imread(os.path.join(IMAGE_DIR, filename))
    gray_img = rgb2gray(image)
    entr_img = entropy(gray_img, disk(10))
    
    arr = np.array(image, dtype = int)

    deltaX2 = np.square(np.roll(arr, -1, axis = 0) - np.roll(arr, 1, axis = 0))

    deltaY2 = np.square(np.roll(arr, -1, axis = 1) - np.roll(arr, 1, axis = 1))

    dualEnergy = np.sum(deltaX2, axis = 2) + np.sum(deltaY2, axis = 2)
    
    mean_entr = np.mean(entr_img)
    mean_ener = np.mean(dualEnergy)
    return [mean_entr, mean_ener]

In [None]:
df2 = pd.DataFrame(columns = ['mean_entr', 'mean_ener'])
for file in os.listdir(IMAGE_DIR):
    if file.endswith('.jpg'):
        data = calc_adv_stats(file)
        dicti = {'mean_entr': data[0], 'mean_ener': data[1]}
        df2 = df2.append(dicti, ignore_index=True)

In [None]:
#normalizing for purpose of graphing
normalized_entr = ((df2['mean_entr'] - df2['mean_entr'].min())/(df2['mean_entr'].max()-df2['mean_entr'].min()))
normalized_ener = ((df2['mean_ener'] - df2['mean_ener'].min())/(df2['mean_ener'].max()-df2['mean_ener'].min()))

In [None]:
bg_color = (192, 192, 192)
figure3 = Image.new('RGB', (GLOBAL_WIDTH, GLOBAL_WIDTH), bg_color)
x_coords = (GLOBAL_WIDTH * normalized_entr).apply(int)
y_coords = (GLOBAL_WIDTH * normalized_ener).apply(int)
for i in range(len(thumbs)):
    thumb_img = Image.open(os.path.join(THUMB_DIR, thumbs[i]))
    figure3.paste(thumb_img, (x_coords[i], y_coords[i]))

In [None]:
figure3.save(os.path.join(RESULT_DIR, "mean_entr vs mean_ener.jpg"))

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
from sklearn import *

In [None]:
scaler = sklearn.preprocessing.StandardScaler()
features = scaler.fit_transform(df)

In [None]:
labels = np.concatenate((np.zeros(int(len(features)/2)), np.ones(int(len(features)/2))))

In [None]:
model = sklearn.svm.SVC()
model.fit(features, labels)