# Scrape data of Database of Modern Exhibitions (DoME)



## Imports

In [19]:
import pandas as pd
import numpy as np
from collections import Counter
from matplotlib import pyplot as plt
import urllib.request
from bs4 import BeautifulSoup
import re
import json

## Load Artists

In [3]:
person = pd.read_excel("Person.xlsx")

## Scrape data
This code takes the id from the artists table and uses a url to download informations about all exhibitions this particular artist had been attending. 
It also scrapes data of the total number of cities and countries that particular artist attended.

In [4]:
artist = {}
for index, row in person.iterrows():
    print("{0} - Now at Artist: {1}".format(index, row["Name"]))
    artistInfo = {}
    id = row["ID"]
    artistInfo["id"] = id
    export = pd.read_excel("https://exhibitions.univie.ac.at/person/" + str(id) + "/exhibition/csv")
    cities = Counter(export["City"].tolist())
    # print(cities)
    artistInfo["cities"] = cities
    artistInfo["gender"] = row["Gender"]
    artistInfo["nExhibitions"] = row["# Exhibitions"]
    artist[row["Name"]] = artistInfo
    # Gets cities and countries
    url = "https://exhibitions.univie.ac.at/person/" + str(id)
    print("Reading URL")
    opener = urllib.request.urlopen(url)
    print("URL finished loading")
    content = opener.read()
    soup = BeautifulSoup(content,'lxml')
    supa = soup.findAll("div", { "class" : "artist-information-text-stats" })
    exPerCountry = supa[-2] # exhibitions per country
    exPerCity = supa[-1] # exhibitions per city
    exPerCountryText = exPerCountry.get_text()
    exPerCityText = exPerCity.get_text()
    nExPerCountry = re.sub('\D', '', exPerCountryText)
    nExPerCity = re.sub('\D', '', exPerCityText)
    artistInfo["exhibitions per country"] = nExPerCountry
    artistInfo["exhibitions per city"] = nExPerCity
    if index > 99:
        break
    

0 - Now at Artist: Renoir, Pierre-Auguste
Reading URL
URL finished loading
1 - Now at Artist: Cézanne, Paul
Reading URL
URL finished loading
2 - Now at Artist: Gogh, Vincent van
Reading URL
URL finished loading
3 - Now at Artist: Bonnard, Pierre
Reading URL
URL finished loading
4 - Now at Artist: Vuillard, Edouard
Reading URL
URL finished loading
5 - Now at Artist: Monet, Claude
Reading URL
URL finished loading
6 - Now at Artist: Pissarro, Camille
Reading URL
URL finished loading
7 - Now at Artist: Liebermann, Max
Reading URL
URL finished loading
8 - Now at Artist: Matisse, Henri
Reading URL
URL finished loading
9 - Now at Artist: Manet, Edouard
Reading URL
URL finished loading
10 - Now at Artist: Denis, Maurice
Reading URL
URL finished loading
11 - Now at Artist: Hübner, Ulrich
Reading URL
URL finished loading
12 - Now at Artist: Gauguin, Paul
Reading URL
URL finished loading
13 - Now at Artist: Kandinsky, Vassily
Reading URL
URL finished loading
14 - Now at Artist: Corinth, Lovis
Rea

## Cleanup of the table

In [5]:
del person["Status"]

In [6]:
person = person.drop(person.index[101:])

## Reformat city and country data

In [7]:
allExPerCity = []
allExPerCountry = []
for item in artist:
    allExPerCountry.append(int(artist[item]["exhibitions per country"]))
    allExPerCity.append(int(artist[item]["exhibitions per city"]))
person["Exhibitions per country"] = allExPerCountry
person["Exhibitions per city"] = allExPerCity

## Get a unique list of all cities visited by the artists and fill number of visits for each city

In [8]:
allCities = []
for entry in artist:
    allCities += (list(artist[entry]["cities"].keys()))
    
uniqueCities = np.unique(allCities)
for city in uniqueCities:
    cityList = []
    for index, row in person.iterrows():
        cityList.append(artist[row["Name"]]["cities"][city])
    person[city] = cityList



## Save data to XLSX

In [10]:
person.to_excel("mareisDaten.xlsx")
person.to_hdf("mareisDaten.h5", key='df')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block2_values] [items->Index(['Name', 'Date of Birth', 'Place of Birth', 'Date of Death',
       'Place of Death', 'Gender', '(Primary) Nationality', 'GND', 'Wikidata'],
      dtype='object')]

  encoding=encoding,


In [None]:
fig, axes = plt.subplots(figsize=(30,2))
barHeight = artist["Renoir, Pierre-Auguste"]["cities"].values()
barString = artist["Renoir, Pierre-Auguste"]["cities"].keys()
axes.bar(barString, barHeight)
plt.savefig("beispiel.png", format="png")

In [24]:
person['Name']
with urllib.request.urlopen('https://www.wikidata.org/wiki/Special:EntityData/Q39931.json') as url:
    data = json.loads(url.read().decode())

In [28]:
data['entities']['Q39931']

{'pageid': 42289,
 'ns': 0,
 'title': 'Q39931',
 'lastrevid': 1192871743,
 'modified': '2020-05-29T00:06:58Z',
 'type': 'item',
 'id': 'Q39931',
 'labels': {'lb': {'language': 'lb', 'value': 'Pierre-Auguste Renoir'},
  'zh': {'language': 'zh', 'value': '皮耶-奧古斯特·雷諾瓦'},
  'pl': {'language': 'pl', 'value': 'Auguste Renoir'},
  'eu': {'language': 'eu', 'value': 'Pierre-Auguste Renoir'},
  'qu': {'language': 'qu', 'value': 'Pierre-Auguste Renoir'},
  'bs': {'language': 'bs', 'value': 'Pierre-Auguste Renoir'},
  'uz': {'language': 'uz', 'value': 'Pierre-Auguste Renoir'},
  'es': {'language': 'es', 'value': 'Pierre-Auguste Renoir'},
  'oc': {'language': 'oc', 'value': 'Pierre-Auguste Renoir'},
  'hu': {'language': 'hu', 'value': 'Pierre-Auguste Renoir'},
  'cbk-zam': {'language': 'cbk-zam', 'value': 'Pierre-Auguste Renoir'},
  'et': {'language': 'et', 'value': 'Pierre-Auguste Renoir'},
  'bn': {'language': 'bn', 'value': 'পিয়ের-অগ্যুস্ত রেনোয়া'},
  'sq': {'language': 'sq', 'value': 'Pierre-