# Getting Data from JSON API

* API stands for Application Programming Interface - 
* sort of open specification for how to interact with a web service
* TODO
* read JSON from some public url
* some ideas here: https://github.com/public-apis/public-apis
* also https://jsonplaceholder.typicode.com/
* https://mockaroo.com/ - requires registration to generate API key
* choose ones that don't require authentication
* many APIs are run by volunteers, so they may be slow or unreliable, or down
* paid APIs are often faster and more reliable

* TODO
* parse JSON using requests
* print out the data
* bonus save the data into a file of some sort, could be JSON could be something else

* you can write using notebook .ipynb or script .py

In [1]:
import pandas as pd
import requests
import json

In [4]:
category_name='food_and_drink'
link = 'https://emojihub.herokuapp.com/api/all/category_'+category_name
link

'https://emojihub.herokuapp.com/api/all/category_food_and_drink'

In [5]:
response = requests.get(link)
if response.status_code != 200:
    print("Bad Response: ", response.status_code)
    emojihub = None
else:
    print("Good Response: ", response.status_code)
    emojihub = json.loads(response.text)
    
type(emojihub)

Good Response:  200


list

In [6]:
# helper function to pretty print JSON
# input: JSON string
# output: pretty printed JSON string
def prettyJSON(myjson, indent=4):
    return json.dumps(json.loads(myjson), indent=indent)

In [7]:
with open('emojihub.json', mode='w') as f:
    f.write(json.dumps(emojihub,indent=4))

In [9]:
from ast import BinOp


chr(127823), chr(127824)

('🍏', '🍐')

In [11]:
# song_url = 'https://api.lyrics.ovh/v1/Ed Sheeran/Perfect'	# Ed Sheeran - Perfect
# song_url = 'https://api.lyrics.ovh/v1/Ed Sheeran/Shape of You'	# Ed Sheeran - Shape of You
# song_url = 'https://api.lyrics.ovh/v1/Ed Sheeran/Thinking Out Loud'	# Ed Sheeran - Thinking Out Loud
song_url = 'https://api.lyrics.ovh/v1/Abba/Waterloo'	# Ed Sheeran - Photograph
response = requests.get(song_url)
if response.status_code != 200:
    print("Bad Response: ", response.status_code)
    song = None
else:
    print("Good Response: ", response.status_code)
    song = json.loads(response.text)
type(song)b

Bad Response:  404


NoneType

In [12]:
song_url_base = "https://www.songsterr.com/a/ra/songs.json?pattern="
artist = "Santana"
url = song_url_base + artist # could use f-string here
url

'https://www.songsterr.com/a/ra/songs.json?pattern=Santana'

In [13]:
def getJSON(url):
    response = requests.get(url)
    if response.status_code != 200:
        print("Bad Response: ", response.status_code)
        return None # alternative would be empty list or dict
    else:
        print("Good Response: ", response.status_code)
        return json.loads(response.text)

In [14]:
song_data = getJSON(url)

Good Response:  200


In [15]:
first_song = song_data[0]
first_song

{'id': 17608,
 'type': 'Song',
 'title': 'Smooth',
 'artist': {'id': 4978,
  'type': 'Artist',
  'nameWithoutThePrefix': 'Carlos Santana',
  'useThePrefix': False,
  'name': 'Carlos Santana'},
 'chordsPresent': True,
 'tabTypes': ['PLAYER', 'TEXT_GUITAR_TAB', 'CHORDS']}

In [18]:
# we are allowed to change dictionary values while iterating
# but we can't change the keys
for song in song_data:
    song["artist"] = song["artist"]["name"] # right side is evaluated first
    # so we are replacing the artist dict with just the name

In [16]:
tab_lengths = [len(song["tabTypes"]) for song in song_data]
max(tab_lengths)

3

In [None]:
# so we could make 3 new keys for each tab type
# if you had say 10 tab types, you would have 10 new keys
# not very practical for large data sets

In [19]:
song_data[:3]

[{'id': 17608,
  'type': 'Song',
  'title': 'Smooth',
  'artist': 'Carlos Santana',
  'chordsPresent': True,
  'tabTypes': ['PLAYER', 'TEXT_GUITAR_TAB', 'CHORDS']},
 {'id': 451,
  'type': 'Song',
  'title': 'Europa',
  'artist': 'Carlos Santana',
  'chordsPresent': False,
  'tabTypes': ['PLAYER', 'TEXT_GUITAR_TAB']},
 {'id': 357,
  'type': 'Song',
  'title': 'Black Magic Woman',
  'artist': 'Carlos Santana',
  'chordsPresent': True,
  'tabTypes': ['PLAYER', 'TEXT_GUITAR_TAB', 'CHORDS']}]

In [20]:
df = pd.DataFrame(song_data)
df.head()

Unnamed: 0,id,type,title,artist,chordsPresent,tabTypes
0,17608,Song,Smooth,Carlos Santana,True,"[PLAYER, TEXT_GUITAR_TAB, CHORDS]"
1,451,Song,Europa,Carlos Santana,False,"[PLAYER, TEXT_GUITAR_TAB]"
2,357,Song,Black Magic Woman,Carlos Santana,True,"[PLAYER, TEXT_GUITAR_TAB, CHORDS]"
3,461961,Song,Smooth accurate,Santana,False,[PLAYER]
4,408283,Song,Jungle Strut,Carlos Santana,False,[PLAYER]


In [21]:
df.to_csv('santana_songs.csv', index=False)

In [23]:
song_data

[{'id': 17608,
  'type': 'Song',
  'title': 'Smooth',
  'artist': 'Carlos Santana',
  'chordsPresent': True,
  'tabTypes': ['PLAYER', 'TEXT_GUITAR_TAB', 'CHORDS']},
 {'id': 451,
  'type': 'Song',
  'title': 'Europa',
  'artist': 'Carlos Santana',
  'chordsPresent': False,
  'tabTypes': ['PLAYER', 'TEXT_GUITAR_TAB']},
 {'id': 357,
  'type': 'Song',
  'title': 'Black Magic Woman',
  'artist': 'Carlos Santana',
  'chordsPresent': True,
  'tabTypes': ['PLAYER', 'TEXT_GUITAR_TAB', 'CHORDS']},
 {'id': 461961,
  'type': 'Song',
  'title': 'Smooth accurate',
  'artist': 'Santana',
  'chordsPresent': False,
  'tabTypes': ['PLAYER']},
 {'id': 408283,
  'type': 'Song',
  'title': 'Jungle Strut',
  'artist': 'Carlos Santana',
  'chordsPresent': False,
  'tabTypes': ['PLAYER']},
 {'id': 23164,
  'type': 'Song',
  'title': 'Into The Night (Ft. Chad Kroeger)',
  'artist': 'Carlos Santana',
  'chordsPresent': False,
  'tabTypes': ['PLAYER']},
 {'id': 408286,
  'type': 'Song',
  'title': "She's Not The