In [1]:
import json
import pandas as pd
import numpy as np

In [2]:
# all of the key in an object must be string.
obj = """
{"name": "Wes",
"places_lived": ["United States", "Spain", "Germany"],
"pet": null,
"siblings": [{"name": "Scott", "age": 25, "pet": "Zuko"},
{"name": "Katie", "age": 33, "pet": "Cisco"}]
}
"""

In [3]:
result = json.loads(obj)
result

{'name': 'Wes',
 'pet': None,
 'places_lived': ['United States', 'Spain', 'Germany'],
 'siblings': [{'age': 25, 'name': 'Scott', 'pet': 'Zuko'},
  {'age': 33, 'name': 'Katie', 'pet': 'Cisco'}]}

In [4]:
# convert a Python object back to JSON
asjson = json.dumps(result)

In [5]:
pd.DataFrame(data = result['siblings'], columns = ['name', 'age'])

Unnamed: 0,name,age
0,Scott,25
1,Katie,33


### XML and HTML: Web Scraping 

In [29]:
from bs4 import BeautifulSoup
import urllib3
import certifi

In [26]:
http = urllib3.PoolManager(cert_reqs = 'CERT_REQUIRED',
                           ca_certs=certifi.where())
req = http.request(method = 'GET',
                   url = 'https://finance.yahoo.com/quote/AAPL/options?ltr=1')

### Interacting with Databases

In [36]:
import sqlite3

In [60]:
query = """
CREATE TABLE test
(a VARCHAR(20), b VARCHAR(20),
 c REAL, d INTEGER 
);"""  # create the table, it has 4 variable a, b, c, d

In [61]:
con = sqlite3.connect(':memory:')
con.execute(query)
con.commit()

### Insert a few rows of data

In [62]:
data = [('Atlanta', 'Georgia', 1.25, 6),
        ('Tallahassee', 'Florida', 2.6, 3),
        ('Sacramento', 'California', 1.7, 5)]
stmt = 'INSERT INTO test VALUES(?, ?, ?, ?)'

con.executemany(stmt, data)
con.commit()

In [63]:
cursor = con.execute('select * from test')

In [64]:
rows = cursor.fetchall()
rows

[('Atlanta', 'Georgia', 1.25, 6),
 ('Tallahassee', 'Florida', 2.6, 3),
 ('Sacramento', 'California', 1.7, 5)]

In [73]:
pd.DataFrame(rows, columns = list(map(lambda x: x[0], cursor.description)))

Unnamed: 0,a,b,c,d
0,Atlanta,Georgia,1.25,6
1,Tallahassee,Florida,2.6,3
2,Sacramento,California,1.7,5


Pandas has a `read_sql_query` function in its pandas.io.sql module that simplifies the process

In [78]:
import pandas.io.sql as sql

sql.read_sql_query(sql = 'select * from test', con = con)

Unnamed: 0,a,b,c,d
0,Atlanta,Georgia,1.25,6
1,Tallahassee,Florida,2.6,3
2,Sacramento,California,1.7,5
