In [1]:
import requests

## CRUD

| | SQL | RESTful API |
|:-:|:-:|:-:|
| create | `INSERT` | `POST` |
| read | `SELECT` | `GET` |
| update | `UPDATE` | `PUT` |
| delete | `DELETE` | `DELETE` |

![](http://interactive.blockdiag.com/image?compression=deflate&encoding=base64&src=eJxLyslPzk7JTExXqOZSUFAPcnV0UUgrys9VKM_MzixIBcqoK-jaKRSlFpfmlBSDmOqefsGuQSEKJfkKBfnFJelAKXVrrloAaBAXgg)

## Read results from Google

In [2]:
requests.get('https://google.com')

<Response [200]>

In [3]:
response = requests.get('https://google.com')

In [4]:
response

<Response [200]>

In [5]:
type(response)

requests.models.Response

In [6]:
response.status_code

200

In [7]:
response.text

'<!doctype html><html itemscope="" itemtype="http://schema.org/WebPage" lang="en"><head><meta content="Search the world\'s information, including webpages, images, videos and more. Google has many special features to help you find exactly what you\'re looking for." name="description"><meta content="noodp" name="robots"><meta content="text/html; charset=UTF-8" http-equiv="Content-Type"><meta content="/images/branding/googleg/1x/googleg_standard_color_128dp.png" itemprop="image"><title>Google</title><script>(function(){window.google={kEI:\'ooWUWcuSGuqY0wKo0qaIAw\',kEXPI:\'18168,1353382,1354277,1354401,1354514,1354625,1354750,1354815,1354839,1355077,3700243,3700347,3700442,4029815,4031109,4043492,4045841,4048347,4072775,4076607,4076999,4077776,4081038,4081164,4083863,4093313,4094544,4095910,4097153,4097922,4097929,4097951,4098721,4098728,4098752,4102110,4102238,4102827,4103475,4103845,4103861,4104258,4107294,4107555,4109316,4109490,4110426,4110656,4110959,4111016,4111275,4113148,4113217,4

In [8]:
response.url

'https://www.google.com/'

In [9]:
# we don't want html
# there are other things websites return when we ask for request to 'get'

In [10]:

# $ URL='https://www.wikidata.org/w/api.php?action=wbgetentities&ids=Q39246&format=json'
# $ curl -s $URL | jq '.entities[].labels.en.value'
# "Richard Feynman"
# 
# $ curl -s $URL | jq '.entities[].claims|length'
# 55

## Query Wikipedia

In [11]:
requests.get('https://www.wikidata.org/w/api.php?action=wbgetentities&ids=Q39246&format=json')

<Response [200]>

In [12]:
response_wiki = requests.get('https://www.wikidata.org/w/api.php?action=wbgetentities&ids=Q39246&format=json')

In [13]:
type(response_wiki)

requests.models.Response

In [14]:
response_wiki.text

'{"entities":{"Q39246":{"pageid":41693,"ns":0,"title":"Q39246","lastrevid":537785543,"modified":"2017-08-14T16:23:44Z","type":"item","id":"Q39246","labels":{"zh":{"language":"zh","value":"\\u7406\\u67e5\\u5fb7\\u00b7\\u8cbb\\u66fc"},"jv":{"language":"jv","value":"Richard Feynman"},"eu":{"language":"eu","value":"Richard Feynman"},"pl":{"language":"pl","value":"Richard Feynman"},"bs":{"language":"bs","value":"Richard Feynman"},"es":{"language":"es","value":"Richard Feynman"},"ta":{"language":"ta","value":"\\u0bb0\\u0bbf\\u0b9a\\u0bcd\\u0b9a\\u0bb0\\u0bcd\\u0b9f\\u0bc1 \\u0b83\\u0baa\\u0bc6\\u0baf\\u0bbf\\u0ba9\\u0bcd\\u0bae\\u0bbe\\u0ba9\\u0bcd"},"hu":{"language":"hu","value":"Richard Feynman"},"ms":{"language":"ms","value":"Richard Feynman"},"sw":{"language":"sw","value":"Richard Feynman"},"et":{"language":"et","value":"Richard Feynman"},"bn":{"language":"bn","value":"\\u09b0\\u09bf\\u099a\\u09be\\u09b0\\u09cd\\u09a1 \\u09ab\\u09be\\u0987\\u09a8\\u09ae\\u09cd\\u09af\\u09be\\u09a8"},"sq"

In [15]:
response_wiki.status_code

200

In [16]:
response_json = response_wiki.json()
# we probably want the json so we can capture 

In [17]:
type(response_json)

dict

In [18]:
response_json

{'entities': {'Q39246': {'aliases': {'am': [{'language': 'am',
      'value': 'ሪቻርድ ፌይንማን'}],
    'ar': [{'language': 'ar', 'value': 'ريتشارد فينمان'}],
    'be': [{'language': 'be', 'value': 'Рычард Фейнман'},
     {'language': 'be', 'value': 'Рычард Файнман'}],
    'bg': [{'language': 'bg', 'value': 'Ричард Филипс Фейнман'},
     {'language': 'bg', 'value': 'Ричард Фейнман'},
     {'language': 'bg', 'value': 'Фейнмън'},
     {'language': 'bg', 'value': 'Фейнман'},
     {'language': 'bg', 'value': 'Ричард Филипс Файнмън'},
     {'language': 'bg', 'value': 'Ричард Файнмън'},
     {'language': 'bg', 'value': 'Ричард Фейнмън'},
     {'language': 'bg', 'value': 'Файнмън'},
     {'language': 'bg', 'value': 'Ричард Филипс Фейнмън'}],
    'bn': [{'language': 'bn', 'value': 'আর পি ফাইনম্যান'},
     {'language': 'bn', 'value': 'রিচার্ড ফিলিপ্\u200cস ফাইনম্যান'}],
    'bs': [{'language': 'bs', 'value': 'Richard P. Feynman'}],
    'ca': [{'language': 'ca', 'value': 'Richard Phillips Feynman'},
 

In [19]:
response_json.keys()

dict_keys(['success', 'entities'])

In [20]:
response_json['entities'].keys()

dict_keys(['Q39246'])

In [21]:
\

In [22]:
# explore the json dictionary and get the information you need
# then put that into a database schema

In [23]:
# there might be a python library for querying the wikipedia API

In [24]:
raw_page_url = 'https://en.wikipedia.org/wiki/Richard_Feynman'

In [25]:
response_rich_f = requests.get(raw_page_url)

In [26]:
response_rich_f.status_code

200

In [27]:
response_rich_f.text

'<!DOCTYPE html>\n<html class="client-nojs" lang="en" dir="ltr">\n<head>\n<meta charset="UTF-8"/>\n<title>Richard Feynman - Wikipedia</title>\n<script>document.documentElement.className = document.documentElement.className.replace( /(^|\\s)client-nojs(\\s|$)/, "$1client-js$2" );</script>\n<script>(window.RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"Richard_Feynman","wgTitle":"Richard Feynman","wgCurRevisionId":794555560,"wgRevisionId":794555560,"wgArticleId":25523,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with IBDb links","Featured articles","Use mdy dates from March 2016","Official website different in Wikidata and Wikipedia","AC with 15 elements","Wikipedia articles with VIAF identifiers","Wikipedia articles with LCCN identifiers","Wikipedia articles with ISNI identifiers","Wikipedia articles with GND i

## Using Josh's old Repo

In [28]:
base_url = "https://en.wikipedia.org/w/api.php"
# notice that Josh used an api.php format that will return something that is more easily parseable than 
# if we were to get raw data

In [29]:

action = "?action=mobileview"
parameters = "&format=json&prop=sections&sections=all"
page = "&page="

In [30]:
# convert it to a url page
richard_feynman_url = base_url + action + parameters + page + 'Richard_Feynman'
richard_feynman_url

# how we know to throw the string 'Richard_Feynman' on the end of the url because 

'https://en.wikipedia.org/w/api.php?action=mobileview&format=json&prop=sections&sections=all&page=Richard_Feynman'

In [31]:
response_rich_f = requests.get(richard_feynman_url)
response_rich_f

<Response [200]>

In [32]:
richard_f_page_headings = response_rich_f.json()
# these are all of the page headings

In [33]:
richard_f_page_headings

{'mobileview': {'sections': [{'id': 0},
   {'id': 1, 'line': 'Early life', 'toclevel': 1},
   {'id': 2, 'line': 'Education', 'toclevel': 1},
   {'id': 3, 'line': 'Manhattan Project', 'toclevel': 1},
   {'id': 4, 'line': 'Cornell', 'toclevel': 1},
   {'id': 5, 'line': 'Caltech years', 'toclevel': 1},
   {'id': 6, 'line': 'Personal and political life', 'toclevel': 2},
   {'id': 7, 'line': 'Physics', 'toclevel': 2},
   {'id': 8, 'line': 'Pedagogy', 'toclevel': 2},
   {'id': 9, 'line': "<i>Surely You're Joking Mr. Feynman</i>", 'toclevel': 2},
   {'id': 10, 'line': 'Challenger disaster', 'toclevel': 2},
   {'id': 11, 'line': 'Recognition and awards', 'toclevel': 2},
   {'id': 12, 'line': 'Death', 'toclevel': 1},
   {'id': 13, 'line': 'Popular legacy', 'toclevel': 1},
   {'id': 14, 'line': 'Bibliography', 'toclevel': 1},
   {'id': 15, 'line': 'Selected scientific works', 'toclevel': 2},
   {'id': 16, 'line': 'Textbooks and lecture notes', 'toclevel': 2},
   {'id': 17, 'line': 'Popular works

In [34]:
richard_f_page_headings['mobileview']['sections']

[{'id': 0},
 {'id': 1, 'line': 'Early life', 'toclevel': 1},
 {'id': 2, 'line': 'Education', 'toclevel': 1},
 {'id': 3, 'line': 'Manhattan Project', 'toclevel': 1},
 {'id': 4, 'line': 'Cornell', 'toclevel': 1},
 {'id': 5, 'line': 'Caltech years', 'toclevel': 1},
 {'id': 6, 'line': 'Personal and political life', 'toclevel': 2},
 {'id': 7, 'line': 'Physics', 'toclevel': 2},
 {'id': 8, 'line': 'Pedagogy', 'toclevel': 2},
 {'id': 9, 'line': "<i>Surely You're Joking Mr. Feynman</i>", 'toclevel': 2},
 {'id': 10, 'line': 'Challenger disaster', 'toclevel': 2},
 {'id': 11, 'line': 'Recognition and awards', 'toclevel': 2},
 {'id': 12, 'line': 'Death', 'toclevel': 1},
 {'id': 13, 'line': 'Popular legacy', 'toclevel': 1},
 {'id': 14, 'line': 'Bibliography', 'toclevel': 1},
 {'id': 15, 'line': 'Selected scientific works', 'toclevel': 2},
 {'id': 16, 'line': 'Textbooks and lecture notes', 'toclevel': 2},
 {'id': 17, 'line': 'Popular works', 'toclevel': 2},
 {'id': 18, 'line': 'Audio and video record