## Building simple graph using Nobel_prize JSON data
#### we just use simple edges as connections.

In [1]:
import json
import pandas as pd

file1 = '../data/Nobel_prize.json'
file2 = '../data/Nobel_laureate.json'
file3 = '../data/Nobel_country.json' # this file has simple json structure

In [2]:
with open(file1) as json_file:
    json_data1 = json.load(json_file)
# json_data

In [3]:
json_data1['prizes'][0]['laureates']

[{'id': '960',
  'firstname': 'Arthur',
  'surname': 'Ashkin',
  'motivation': '"for the optical tweezers and their application to biological systems"',
  'share': '2'},
 {'id': '961',
  'firstname': 'Gérard',
  'surname': 'Mourou',
  'motivation': '"for their method of generating high-intensity, ultra-short optical pulses"',
  'share': '4'},
 {'id': '962',
  'firstname': 'Donna',
  'surname': 'Strickland',
  'motivation': '"for their method of generating high-intensity, ultra-short optical pulses"',
  'share': '4'}]

In [4]:
with open(file2) as json_file:
    json_data2 = json.load(json_file)

In [5]:
json_data2['laureates'][100]

{'id': '103',
 'firstname': 'Ben Roy',
 'surname': 'Mottelson',
 'born': '1926-07-09',
 'died': '0000-00-00',
 'bornCountry': 'USA',
 'bornCountryCode': 'US',
 'bornCity': 'Chicago, IL',
 'gender': 'male',
 'prizes': [{'year': '1975',
   'category': 'physics',
   'share': '3',
   'motivation': '"for the discovery of the connection between collective motion and particle motion in atomic nuclei and the development of the theory of the structure of the atomic nucleus based on this connection"',
   'affiliations': [{'name': 'Nordita',
     'city': 'Copenhagen',
     'country': 'Denmark'}]}]}

In [6]:
import networkx as nx
from pprint import pprint
from graphgen import create_graph

In [7]:
nodes_mapper = {
    'nodes': [
        {
            'type': 'Affiliations',
            'path': '/prizes/affiliations',
            'key' : [
                {'name': 'name', 'raw': '/prizes/affiliations/name'}
            ],
            'attributes': [
                {'name': 'name',    'raw': '/prizes/affiliations/name'},
                {'name': 'city',    'raw': '/prizes/affiliations/city'},
                {'name': 'country', 'raw': '/prizes/affiliations/country'},
            ]
        },
        {
            'type': 'Prize',
            'path': '/prizes',
            'key' : [
                {'name': 'category', 'raw': '/prizes/category'}
            ],
            'attributes': [
                {'name': 'category', 'raw': '/prizes/category'},
            ]
        }
    ]
}
edges_mapper = {
    'edges': [
        {
            'type': 'Awarded',
            'from': {
                'type': 'Affiliations',
                'path': '/prizes/affiliations',
                'key' : [
                    {'name': 'name', 'raw': '/prizes/affiliations/name'}
                ]
            },
            'to'  : {
                'type': 'Prize',
                'path': '/prizes',
                'key' : [
                    {'name': 'category', 'raw': '/prizes/category'}
                ]
            },
#             'attributes': [
#                 {'name': 'year', 'raw': '/prizes/year'}, 
#             ]
        }
    ]
}

In [8]:
g = nx.MultiGraph()

g = create_graph(g, graph_mapper = nodes_mapper, 
                 data_provider = json_data2['laureates'])

g = create_graph(g, graph_mapper = edges_mapper, 
                 data_provider = json_data2['laureates'])

type: /prizes/affiliations/ - 753
type: /prizes/ - 941
type: /prizes/affiliations/ -> /prizes/ - 753


In [9]:
type(g)

networkx.classes.multigraph.MultiGraph

In [10]:
nx.number_of_nodes(g)

341

In [11]:
nx.number_of_edges(g)

753

In [12]:
pprint(json_data2['laureates'][216])

{'born': '1907-10-02',
 'bornCity': 'Glasgow',
 'bornCountry': 'Scotland',
 'bornCountryCode': 'GB',
 'died': '1997-01-10',
 'diedCity': 'Cambridge',
 'diedCountry': 'United Kingdom',
 'diedCountryCode': 'GB',
 'firstname': 'Lord (Alexander R.)',
 'gender': 'male',
 'id': '221',
 'prizes': [{'affiliations': [{'city': 'Cambridge',
                               'country': 'United Kingdom',
                               'name': 'University of Cambridge'}],
             'category': 'chemistry',
             'motivation': '"for his work on nucleotides and nucleotide '
                           'co-enzymes"',
             'share': '1',
             'year': '1957'}],
 'surname': 'Todd'}


In [13]:
print(g.node['Affiliations_University of Cambridge'])

{'_type_': 'Affiliations', 'name': 'University of Cambridge', 'city': 'Cambridge', 'country': 'United Kingdom'}


In [14]:
print(g.node['Prize_chemistry'])
print(g.node['Prize_physics'])

{'_type_': 'Prize', 'category': 'chemistry'}
{'_type_': 'Prize', 'category': 'physics'}


In [15]:
pprint(g.get_edge_data('Affiliations_University of Cambridge', 'Prize_chemistry'))

{0: {'_type_': 'Awarded'}, 1: {'_type_': 'Awarded'}, 2: {'_type_': 'Awarded'}}


In [16]:
pprint(g.get_edge_data('Prize_chemistry', 'Affiliations_University of Cambridge'))

{0: {'_type_': 'Awarded'}, 1: {'_type_': 'Awarded'}, 2: {'_type_': 'Awarded'}}
