## Working with Binary Data, HTML and Web APIs

In [1]:
import pandas as pd

In [2]:
# One of the easiest ways to store data efficiently in binary format is using Python’s built-
# in pickle serialization. 

# saving data as pickle
fh = pd.read_csv("dataset/Emp.csv")
print(len(fh))

34218


In [3]:
fh.to_pickle("dataset/emp1.pickle")

In [4]:
pd.read_pickle("dataset/emp1.pickle").head()

Unnamed: 0,LNAME,FNAME
0,AARON,ELVIA J
1,AARON,JEFFERY M
2,AARON,KIMBERLEI R
3,ABAD JR,VICENTE M
4,ABBATACOLA,ROBERT J


### Reading Microsoft Excel Files

In [5]:
xls_file = pd.ExcelFile('dataset/Emp.xls')

In [6]:
table = xls_file.parse('Sheet1')

In [7]:
table.head()

Unnamed: 0,LNAME,FNAME,JOB TITLE,DEPARTMENT,EMPLOYEE ANNUAL SALARY,ESTIMATED ANNUAL SALARY MINUS FURLOUGHS
0,AARON,ELVIA J,WATER RATE TAKER,WATER MGMNT,$81000.00,$73862.00
1,AARON,JEFFERY M,POLICE OFFICER,POLICE,$74628.00,$74628.00
2,AARON,KIMBERLEI R,CHIEF CONTRACT EXPEDITER,FLEET MANAGEMNT,$77280.00,$70174.00
3,ABAD JR,VICENTE M,CIVIL ENGINEER IV,WATER MGMNT,$96276.00,$96276.00
4,ABBATACOLA,ROBERT J,ELECTRICAL MECHANIC,WATER MGMNT,$84032.00,$76627.00


### Reading HTML and Web API

In [4]:
import tweepy

# Store OAuth authentication credentials in relevant variables
access_token = "2222877248-uUB0zIZ6kYZd8qADB9QYHyKwOmlB7Dg8cZPBV2t"
access_token_secret = "1VMqhkAav669HrCVyI1L81IyGCxlYIVwfbFlPrafiaRaN"
consumer_key = "OuaJeWQK4FM8DR97tJxUqp56W"
consumer_secret = "7nCQmef95jSeeofa0upm3t0lM9UpOK9SNG6AXSfOcbEent2R1p"

# Pass OAuth details to tweepy's OAuth handler
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth)

In [2]:
import requests

url = 'https://api.twitter.com/1.1/search/tweets.json?q=datagenx'

resp = requests.get(url)

In [3]:
import json

data = json.loads(resp.text)
data.keys()

dict_keys(['errors'])

In [11]:
print(data)

{'errors': [{'code': 215, 'message': 'Bad Authentication data.'}]}


### Interacting with Databases

In [12]:
import sqlite3

query = """
CREATE TABLE test
(a VARCHAR(20), b VARCHAR(20),
 c REAL,        d INTEGER
);"""

In [13]:
con = sqlite3.connect(':memory:')
con.execute(query)
con.commit()

In [14]:
data = [('Atlanta', 'Georgia', 1.25, 6),
        ('Tallahassee', 'Florida', 2.6, 3),
        ('Sacramento', 'California', 1.7, 5)]
stmt = "INSERT INTO test VALUES(?, ?, ?, ?)"
con.executemany(stmt, data)
con.commit()

In [15]:
cursor = con.execute('select * from test')
rows = cursor.fetchall()
print(rows)

[('Atlanta', 'Georgia', 1.25, 6), ('Tallahassee', 'Florida', 2.6, 3), ('Sacramento', 'California', 1.7, 5)]


In [16]:
cursor.description

(('a', None, None, None, None, None, None),
 ('b', None, None, None, None, None, None),
 ('c', None, None, None, None, None, None),
 ('d', None, None, None, None, None, None))

In [17]:
for x in zip(*cursor.description):
    print(x)
    
list(zip(*cursor.description))[0]

('a', 'b', 'c', 'd')
(None, None, None, None)
(None, None, None, None)
(None, None, None, None)
(None, None, None, None)
(None, None, None, None)
(None, None, None, None)


('a', 'b', 'c', 'd')

In [18]:
pd.DataFrame(rows, columns=list(zip(*cursor.description))[0])

Unnamed: 0,a,b,c,d
0,Atlanta,Georgia,1.25,6
1,Tallahassee,Florida,2.6,3
2,Sacramento,California,1.7,5


In [19]:
import pandas.io.sql as sql

sql.read_sql_query('select * from test', con)

Unnamed: 0,a,b,c,d
0,Atlanta,Georgia,1.25,6
1,Tallahassee,Florida,2.6,3
2,Sacramento,California,1.7,5
