In [23]:
import networkx as nx
import json
import matplotlib.pyplot as plt
import operator
from collections import Counter
from networkx.algorithms import bipartite

In [2]:
with open("data/entity_dump_cleaned_and_restructured.json", encoding = 'cp1252') as f:
    data = json.load(f)

### Auxiliary functions

In [3]:
def exchange_id_with_value(id):
    value = data[id]
    for index in range(len(data[id])):
        for index_2 in range(len(data[id][index])):
            if type(data[id][index][index_2]) == dict:
                label = data[id][index][index_2]["Label"]
                break


    return label

### Dictionaries of classes

In [4]:
work_dict = {}
for key in list(data.keys()):
    if key.__contains__("work"):
        work_dict[key] = data[key]

In [5]:
photo_dict = {}
for key in list(data.keys()):
    if key.__contains__("photo"):
        photo_dict[key] = data[key]

### Finding relations

#### First artworks and artists

In [6]:
works_and_artists = {}
for work in list(work_dict.keys()):
    
    work_as_key = None
    artist_id = "No Artist"
    profession_id = None
    
    
    for attribute in work_dict[work]:
        for index in range(len(attribute)):
            if type(attribute[index]) == list and attribute[index][0] == "PC14_carried_out_by(cidoc-crm)":
                work_as_key = work.split("(")[0]
                artist_id = attribute[index][1].split("(")[0]
            elif type(attribute[index]) == list and attribute[index][1] == "PC14_carried_out_by(cidoc-crm)":
                work_as_key = work.split("(")[0]
                artist_id = attribute[index][0].split("(")[0]

            if type(attribute[index]) == list and attribute[index][0] == "P14.1_in_the_role_of(cidoc-crm)":
                profession_id = exchange_id_with_value(attribute[index][1])
            elif type(attribute[index]) == list and attribute[index][0] == "P14.1_in_the_role_of(cidoc-crm)":
                profession_id = exchange_id_with_value(attribute[index][0])

    if work_as_key != None:
        works_and_artists[work_as_key] = (artist_id, profession_id)
    
      
    if work_as_key == None:
        work_as_key = work.split("(")[0]
        works_and_artists[work_as_key] = ["No Artist", None]

In [7]:
# example of the dictionary structure
count = 0

for idx, (k, v) in enumerate(works_and_artists.items()):
    if v[0] == 'No Artist':
        continue
    else:
        print((k, v))
        count += 1
        
    if count == 5:
        break

('07602583%2CT%2C001', ('07600210', 'Bildhauer'))
('07602583%2CT%2C005', ('00086500', 'Bildhauer'))
('07602583%2CT%2C004', ('00086500', 'Bildhauer'))
('07602583%2CT%2C002', ('00086500', 'Bildhauer'))
('07602583%2CT%2C003', ('00086500', 'Bildhauer'))


In [8]:
G = nx.Graph()

for idx, (k, v) in enumerate(works_and_artists.items()):
    if v[0] == 'No Artist':
        continue
    else:
        G.add_node(k, ent='artwork')
        if v[1] == None:
            G.add_node(v[0], ent='artist', profession='NA')
        else:
            G.add_node(v[0], ent='artist', profession=v[1])
        G.add_edge(k,v[0],weight=1)

In [9]:
artworks = {n for n, d in G.nodes(data=True) if d["ent"] == 'artwork'}
artists = set(G) - artworks

In [10]:
print(len(artworks))
print(len(artists))
print(len(G.edges()))

52381
5485
53636


#### Second, artworks and photos

In [11]:
# all the photos that have a relation with an artwork
photos_with_works = {}
for counter, photo in enumerate(photo_dict):
    for attribute in photo_dict[photo]:
        for index in range(len(attribute)):
            if type(attribute[index]) == list and "P128_carries(cidoc-crm)" in attribute[index]:
                photos_with_works[photo] = photo_dict[photo]

In [12]:
# example of the dictionary structure
for idx, (k, v) in enumerate(photos_with_works.items()):
    if idx == 5: 
        break
    else:
        print((k, v))

('fln0419237z_p(photo)', [[['61(746)', 'E22_Man-Made_Object(cidoc-crm)'], {'Label': '746/61'}, ['P2_has_type(cidoc-crm)', '300127173(aat)']], [['C005A1DD-D117-3CA1-96E0-86DD3C5F6718(acquisition)', 'E8_Acquisition(cidoc-crm)'], ['P4_has_time-span(cidoc-crm)', '921EF3FA-2031-329D-91F1-3CE104C2B18D(timespan)']], [['fln0419237z_p(photo)', 'Namedgraph(custom)'], ['has_provider(custom)', 'KHI(source)']], [['fln0419237z_p(photo)', 'E22_Man-Made_Object(cidoc-crm)'], {'Label': 'fln0419237z_p'}, ['P108i_was_produced_by(cidoc-crm)', 'production(fln0419237z_p)'], ['P128_carries(cidoc-crm)', 'visual_item(07650134)'], ['P129i_is_subject_of(cidoc-crm)', 'default.jpg(0)'], ['P1_is_identified_by(cidoc-crm)', 'E4EBEBAD-9A32-30C2-8185-7B091C31F076(identifier)'], ['P24i_changed_ownership_through(cidoc-crm)', 'C005A1DD-D117-3CA1-96E0-86DD3C5F6718(acquisition)'], ['P2_has_type(cidoc-crm)', '300127104(aat)'], ['P43_has_dimension(cidoc-crm)', 'ACAD540B-9BCA-31FF-89DD-578CCC8BB592(-)'], ['P50_has_current_keepe

In [13]:
print(len(photos_with_works))

33028


In [14]:
photographer_dict = {}

for counter, photo in enumerate(photos_with_works):
    
    photographer = None
    
    for attribute in photo_dict[photo]:
        for index in range(len(attribute)):
            if type(attribute[index]) == list and "P128_carries(cidoc-crm)" in attribute[index]:
                work_on_photo = [ element.split("(")[1].replace(")", "") for element in attribute[index] if element.__contains__("visual_item")][0]
            if type(attribute[index]) == list and "P14_carried_out_by(cidoc-crm)" in attribute[index]:
                photographer = exchange_id_with_value([element for element in attribute[index] if element.__contains__("actor")][0])
    
    if photographer == None:
        continue
    else:
        if photographer not in photographer_dict:
            photographer_dict[photographer] = [work_on_photo]
        else:
            photographer_dict[photographer].append(work_on_photo)

In [15]:
print(len(photographer_dict))

68


In [16]:
# example of the dictionary structure
print(photographer_dict['Artini,Luigi'])

['07602583%2CT%2C009', '07602583%2CT%2C009', '07520168', '07580639', '07580639', '07520176', '07932658', '07580638', '07580638', '07602583', '07520167', '07580639', '07520169%2CT%2C007', '07580639', '07932656', '07520175', '07520169%2CT%2C002', '07580638', '07520178', '07580639', '07580639', '07580638', '07520169%2CT%2C004', '07602583%2CT%2C009', '07520169%2CT%2C007', '07602583%2CT%2C009', '07602583%2CT%2C007', '07580638', '07932656', '07580639', '07520169%2CT%2C003', '07520178', '07580639', '07580638', '07580638', '07520169%2CT%2C005', '07520178', '07580639', '07580638', '07602583%2CT%2C007', '07932656', '07580639', '07520169%2CT%2C001', '07580638', '07580638', '07580639', '07580638', '07602583%2CT%2C007', '07932656', '07602583%2CT%2C007', '07932656', '07580638', '07580639', '07602583', '07580638', '07580638', '07580639', '07602583%2CT%2C007', '07602583%2CT%2C007', '07602583', '07602583%2CT%2C007', '07580638', '07932658', '07602583%2CT%2C007', '07520169%2CT%2C007', '07580639', '075201

In [17]:
# now adding photgraphers to the network and connecting them with artworks
for k, v in photographer_dict.items():
    
    G.add_node(k, ent='photographer')
    weightDist = Counter(v)
    
    for work, wht in weightDist.items():
        G.add_node(work, ent='artwork')
        G.add_edge(k, work, weight=wht)

In [18]:
# updating node sets
artworks = {n for n, d in G.nodes(data=True) if d["ent"] == 'artwork'}
artists = {n for n, d in G.nodes(data=True) if d["ent"] == 'artist'}
photographers = {n for n, d in G.nodes(data=True) if d["ent"] == 'photographer'}

In [19]:
print(len(G.nodes()))
print(len(artworks)+len(artists)+len(photographers))

66742
66742


In [20]:
### saving the whole network

In [28]:
nx.write_gexf(G,'networks/artist_artwork_photographer_whole.gexf')

### Separate networks

#### Artwork – Photographers

In [25]:
B1 = nx.Graph()

for k, v in photographer_dict.items():
    
    B1.add_node(k, ent='photographer')
    weightDist = Counter(v)
    
    for work, wht in weightDist.items():
        B1.add_node(work, ent='artwork')
        B1.add_edge(k, work, weight=wht)

In [26]:
# Projecting the one-mode network
G1 = bipartite.weighted_projected_graph(B1, photographers)

In [29]:
nx.write_gexf(G1,'networks/cophotograph_artwork_whole.gexf')

In [17]:
### filtering nodes: decide type of filtering
filter_artists = []
for a in artists:
    if G1.degree(a) <= 10:
        filter_artists.append(a)
print(len(filter_artists))
print(len(artists)-len(filter_artists))

4589
896


In [18]:
G1.remove_nodes_from(filter_artists)

In [19]:
# now, remove artworks without artists
filter_artworks = []
for w in artworks:
    if G1.degree(w) == 0:
        filter_artworks.append(w)
print(len(filter_artworks))
print(len(artworks)-len(filter_artworks))

10548
41833


In [20]:
G1.remove_nodes_from(filter_artworks)

In [None]:
# now we can save any type of (filtered) network
nx.write_gexf(G1,'<filtered-network-filename>')

## Testing area!

In [21]:
weights_test = []
for k, v in photographer_dict.items():
    
    weightDist = Counter(v)
    
    for work, wht in weightDist.items():
        weights_test.append(wht)


In [22]:
print(max(weights_test), min(weights_test))

67 1
