# Lab Solutions: Binary Tree Data Structures and Nested JSON

In [22]:
"""Data Structures: Working with Graphs/Networks"""
# EXAMPLE: link nodes into a ring
# Function to link 2 nodes (not in the sense of last script)
def makeLink(G, node1, node2):
  if node1 not in G:
    G[node1] = {}
  G[node1][node2] = True
  if node2 not in G:
    G[node2] = {}
  G[node2][node1] = True
  return G 

In [23]:
graph = {}
graph = makeLink(graph, "a", "b")
graph

{'a': {'b': True}, 'b': {'a': True}}

In [24]:
# empty graph 
ring = {} 

# number of nodes 
n = 5 

# Add in edges with makeLink function
for i in range(n):
  ring = makeLink(ring, i, (i+1)%n)
  print(ring)

{0: {1: True}, 1: {0: True}}
{0: {1: True}, 1: {0: True, 2: True}, 2: {1: True}}
{0: {1: True}, 1: {0: True, 2: True}, 2: {1: True, 3: True}, 3: {2: True}}
{0: {1: True}, 1: {0: True, 2: True}, 2: {1: True, 3: True}, 3: {2: True, 4: True}, 4: {3: True}}
{0: {1: True, 4: True}, 1: {0: True, 2: True}, 2: {1: True, 3: True}, 3: {2: True, 4: True}, 4: {3: True, 0: True}}


In [6]:
print(ring)
# 4 = 0 = 1 = 2 = 3 = 4

{0: {1: True, 4: True}, 1: {0: True, 2: True}, 2: {1: True, 3: True}, 3: {2: True, 4: True}, 4: {3: True, 0: True}}


In [7]:
# How many nodes?
print(len(ring))

5


In [8]:
# How many edges?
print(sum([len(ring[node]) for node in ring.keys()])/2)

5.0


1. Grid Network

* Create a square graph with 9 nodes using the makeLink function (from above)
* Example: https://www.researchgate.net/profile/Mehdi-Zaferanieh/publication/344188550/figure/fig2/AS:940265934184449@1601188276248/The-grid-network-with-9-nodes_Q320.jpg


In [26]:
# YOUR CODE HERE
# You may want to use the module math
import math 

n = 9
g = {}

In [27]:
for i in range(1,n):
  n_width = int(math.sqrt(n))
  ## if we are not on a boundary
  ## i.e., if node is not multiple of our set node width
  ## then link to next node
  if i%n_width != 0:
    makeLink(g, i, i+1)
  ## if not on last row
  ## link to node directly below
  if i <= n-n_width:
    makeLink(g, i, i+n_width)
print(g)

# 1--2--3
# |  |  |
# 4--5--6
# |  |  |
# 7--8--9

{1: {2: True, 4: True}, 2: {1: True, 3: True, 5: True}, 4: {1: True, 5: True, 7: True}, 3: {2: True, 6: True}, 5: {2: True, 4: True, 6: True, 8: True}, 6: {3: True, 5: True, 9: True}, 7: {4: True, 8: True}, 8: {5: True, 7: True, 9: True}, 9: {6: True, 8: True}}


* Define a function countEdges

In [28]:
# YOUR CODE HERE
def count_edges(graph):
  ## apply len function to each element of graph (how many connections)
  ## sum up all connections
  ## divide by 2 since each counted twice in graph (1->2 and 2<-1)
  return sum(map(len, graph.values()))/2

count_edges(g)

12.0

In [31]:
# YOUR CODE HERE
def count_edges(graph):
    raw_sum = 0
    for v in graph.values():
      raw_sum += len(v)
    return raw_sum/2

count_edges(g)

12.0

2. Social Network

In [32]:
# some set-up:
class Actor(object):
  def __init__(self, name):
    self.name = name 

  def __repr__(self):
    return self.name 

In [33]:
ss = Actor("Susan Sarandon")
jr = Actor("Julia Roberts")
kb = Actor("Kevin Bacon")
ah = Actor("Anne Hathaway")
rd = Actor("Robert DeNiro")
ms = Actor("Meryl Streep")
dh = Actor("Dustin Hoffman")

In [34]:
movies = {}

movies = makeLink(movies, dh, rd) # Wag the Dog
movies = makeLink(movies, rd, ms) # Marvin's Room
movies = makeLink(movies, dh, ss) # Midnight Mile
movies = makeLink(movies, dh, jr) # Hook
movies = makeLink(movies, dh, kb) # Sleepers
movies = makeLink(movies, ss, jr) # Stepmom
movies = makeLink(movies, kb, jr) # Flatliners
movies = makeLink(movies, kb, ms) # The River Wild
movies = makeLink(movies, ah, ms) # Devil Wears Prada
movies = makeLink(movies, ah, jr) # Valentine's Day

In [35]:
def findPath(graph, start, end, path=[]):
    ## create list
    path = path + [start]
    ## base case, reached end
    if start == end:
        return path
    if start not in graph:
        return None
    ## for each connection to starting node
    for node in graph[start]:
        ## check if it is already in path
        if node not in path:
            break
    ## if not, call recursively, thus adding node to path
    ## carry around path object with you
    return findPath(graph, node, end, path)

print(findPath(movies, jr, ms))

[Julia Roberts, Dustin Hoffman, Robert DeNiro, Meryl Streep]


In [36]:
## start with julia roberts 
## who is she directly connected to?
movies[jr].keys()
## who are they connected to?
movies[ss].keys() 
movies[ah].keys() ## found meryl streep!
movies[dh].keys()
movies[kb].keys() ## found meryl streep!
## so shortest path is either
## jr -- ah -- ms
## jr -- kb -- ms

dict_keys([Dustin Hoffman, Julia Roberts, Meryl Streep])

In [43]:
movies[jr]

{Dustin Hoffman: True,
 Susan Sarandon: True,
 Kevin Bacon: True,
 Anne Hathaway: True}

In [44]:
movies[dh]

{Robert DeNiro: True,
 Susan Sarandon: True,
 Julia Roberts: True,
 Kevin Bacon: True}

In [45]:
movies[rd]

{Dustin Hoffman: True, Meryl Streep: True}

* Implement `findAllPaths()` to find all paths between two nodes

In [49]:
# YOUR CODE HERE
def findAllPaths(graph, start, end, path=[]):
        path = path + [start]

        # base case
        if start == end:
            return [path]
        if start not in graph:
            return None
        # empty list to fill with paths   
        allpaths = []
        for node in graph[start]:
            # if the node you're on is not in the current path
            if node not in path:
                # turn it into the new starting point and run recursively
                allpaths.extend(findAllPaths(graph, node, end, path))
                # path list gets passed through each level until it reaches the end node
        allpaths = filter(None, allpaths)
        return allpaths

allpaths = findAllPaths(graph = movies, start = jr, end = ms)

In [50]:
for path in allpaths:
  print(path)

[Julia Roberts, Dustin Hoffman, Robert DeNiro, Meryl Streep]
[Julia Roberts, Dustin Hoffman, Kevin Bacon, Meryl Streep]
[Julia Roberts, Susan Sarandon, Dustin Hoffman, Robert DeNiro, Meryl Streep]
[Julia Roberts, Susan Sarandon, Dustin Hoffman, Kevin Bacon, Meryl Streep]
[Julia Roberts, Kevin Bacon, Dustin Hoffman, Robert DeNiro, Meryl Streep]
[Julia Roberts, Kevin Bacon, Meryl Streep]
[Julia Roberts, Anne Hathaway, Meryl Streep]


* Implement `findShortestPath()` to print shorest path between actors

In [51]:
# YOUR CODE HERE
def findShortestPath(graph, start, end):
    allpaths = findAllPaths(graph, start, end)
    return min(allpaths, key = len)

findShortestPath(movies, jr, ms)

[Julia Roberts, Kevin Bacon, Meryl Streep]

In [52]:
print(findShortestPath(movies, ms, ss))

[Meryl Streep, Robert DeNiro, Dustin Hoffman, Susan Sarandon]


3. Flatten & extract the following information from the .json file in the lab folder. Each can be its own dataframe:

* Works
* Concerts
* Soloists

In [None]:
# YOUR CODE HERE

In [2]:
import json 
import pandas as pd 

In [3]:
with open('raw_nyc_phil.json') as f:
    d = json.load(f)

In [4]:
d

{'programs': [{'season': '1842-43',
   'orchestra': 'New York Philharmonic',
   'concerts': [{'Date': '1842-12-07T05:00:00Z',
     'eventType': 'Subscription Season',
     'Venue': 'Apollo Rooms',
     'Location': 'Manhattan, NY',
     'Time': '8:00PM'}],
   'programID': '3853',
   'works': [{'workTitle': 'SYMPHONY NO. 5 IN C MINOR, OP.67',
     'conductorName': 'Hill, Ureli Corelli',
     'ID': '52446*',
     'soloists': [],
     'composerName': 'Beethoven,  Ludwig  van'},
    {'workTitle': 'OBERON',
     'composerName': 'Weber,  Carl  Maria Von',
     'conductorName': 'Timm, Henry C.',
     'ID': '8834*4',
     'soloists': [{'soloistName': 'Otto, Antoinette',
       'soloistRoles': 'S',
       'soloistInstrument': 'Soprano'}],
     'movement': '"Ozean, du Ungeheuer" (Ocean, thou mighty monster), Reiza (Scene and Aria), Act II'},
    {'workTitle': 'QUINTET, PIANO, D MINOR, OP. 74',
     'ID': '3642*',
     'soloists': [{'soloistName': 'Scharfenberg, William',
       'soloistRoles': 'A

In [5]:
nycphil = d['programs']

In [8]:
pd.json_normalize(nycphil).head(3)

Unnamed: 0,season,orchestra,concerts,programID,works,id
0,1842-43,New York Philharmonic,"[{'Date': '1842-12-07T05:00:00Z', 'eventType':...",3853,"[{'workTitle': 'SYMPHONY NO. 5 IN C MINOR, OP....",38e072a7-8fc9-4f9a-8eac-3957905c0002
1,1842-43,New York Philharmonic,"[{'Date': '1843-02-18T05:00:00Z', 'eventType':...",5178,[{'workTitle': 'SYMPHONY NO. 3 IN E FLAT MAJOR...,c7b2b95c-5e0b-431c-a340-5b37fc860b34
2,1842-43,Musicians from the New York Philharmonic,"[{'Date': '1843-04-07T05:00:00Z', 'eventType':...",10785,"[{'workTitle': 'EGMONT, OP.84', 'composerName'...",894e1a52-1ae5-4fa7-aec0-b99997555a37


##### Works data

In [13]:
works_data = pd.json_normalize(data=nycphil, record_path='works', 
                            meta=['id', 'orchestra','programID', 'season'])
works_data.head(3)

Unnamed: 0,workTitle,conductorName,ID,soloists,composerName,movement,interval,movement.em,movement._,workTitle.em,workTitle._,id,orchestra,programID,season
0,"SYMPHONY NO. 5 IN C MINOR, OP.67","Hill, Ureli Corelli",52446*,[],"Beethoven, Ludwig van",,,,,,,38e072a7-8fc9-4f9a-8eac-3957905c0002,New York Philharmonic,3853,1842-43
1,OBERON,"Timm, Henry C.",8834*4,"[{'soloistName': 'Otto, Antoinette', 'soloistR...","Weber, Carl Maria Von","""Ozean, du Ungeheuer"" (Ocean, thou mighty mons...",,,,,,38e072a7-8fc9-4f9a-8eac-3957905c0002,New York Philharmonic,3853,1842-43
2,"QUINTET, PIANO, D MINOR, OP. 74",,3642*,"[{'soloistName': 'Scharfenberg, William', 'sol...","Hummel, Johann",,,,,,,38e072a7-8fc9-4f9a-8eac-3957905c0002,New York Philharmonic,3853,1842-43


In [18]:
nycphil[0]['concerts']

[{'Date': '1842-12-07T05:00:00Z',
  'eventType': 'Subscription Season',
  'Venue': 'Apollo Rooms',
  'Location': 'Manhattan, NY',
  'Time': '8:00PM'}]

In [19]:
nycphil[1]['concerts']

[{'Date': '1843-02-18T05:00:00Z',
  'eventType': 'Subscription Season',
  'Venue': 'Apollo Rooms',
  'Location': 'Manhattan, NY',
  'Time': '8:00PM'}]

In [20]:
len(nycphil)

13954

##### Concerts data

In [21]:
concerts_data = pd.json_normalize(data=nycphil, record_path='concerts')
concerts_data.head(10)

Unnamed: 0,Date,eventType,Venue,Location,Time
0,1842-12-07T05:00:00Z,Subscription Season,Apollo Rooms,"Manhattan, NY",8:00PM
1,1843-02-18T05:00:00Z,Subscription Season,Apollo Rooms,"Manhattan, NY",8:00PM
2,1843-04-07T05:00:00Z,Special,Apollo Rooms,"Manhattan, NY",8:00PM
3,1843-04-22T05:00:00Z,Subscription Season,Apollo Rooms,"Manhattan, NY",8:00PM
4,1843-11-18T05:00:00Z,Subscription Season,Apollo Rooms,"Manhattan, NY",
5,1844-01-13T05:00:00Z,Subscription Season,Apollo Rooms,"Manhattan, NY",8:00PM
6,1844-03-16T05:00:00Z,Subscription Season,Apollo Rooms,"Manhattan, NY",
7,1844-05-18T05:00:00Z,Subscription Season,Apollo Rooms,"Manhattan, NY",
8,1844-11-16T05:00:00Z,Subscription Season,Apollo Rooms,"Manhattan, NY",8:00PM
9,1845-01-11T05:00:00Z,Subscription Season,Apollo Rooms,"Manhattan, NY",8:00PM


##### Soloists data

In [55]:
soloist_data = pd.json_normalize(data=nycphil, record_path=['works', 'soloists'], 
                              meta=['id'])

soloist_data.head()

Unnamed: 0,soloistName,soloistRoles,soloistInstrument,id
0,"Otto, Antoinette",S,Soprano,38e072a7-8fc9-4f9a-8eac-3957905c0002
1,"Scharfenberg, William",A,Piano,38e072a7-8fc9-4f9a-8eac-3957905c0002
2,"Hill, Ureli Corelli",A,Violin,38e072a7-8fc9-4f9a-8eac-3957905c0002
3,"Derwort, G. H.",A,Viola,38e072a7-8fc9-4f9a-8eac-3957905c0002
4,"Boucher, Alfred",A,Cello,38e072a7-8fc9-4f9a-8eac-3957905c0002


In [1]:
# Copyright of the original version:

# Copyright (c) 2014 Matt Dickenson
# 
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# 
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.