In [3]:
%matplotlib inline
from IPython.core.display import HTML
from IPython.display import YouTubeVideo
from pandas_datareader import data, wb

import os
import pandas as pd
import numpy as np
import datetime

path1 = os.path.join(os.getcwd(),'style-table.css')
path2 = os.path.join(os.getcwd(),'style-notebook.css')

css = open(path1).read() + open(path2).read()
HTML('<style>{}</style>'.format(css))

In [2]:
# pre-requisites:

# conda install -c https://conda.anaconda.org/anaconda pandas-datareader
# conda install html5lib
# conda install beautiful-soup

# if ssl errors show up, use: conda remove certifi

### input from csv/text

In [None]:
# Manually make textfile1.csv from:
 
# q,r,s,t,apple
# 2,3,4,5,pear
# a,s,d,f,rabbit
# 5,2,5,7,dog

In [3]:
# read textfile1.csv into a dataframe using read_csv()
# use header=None to avoid first row being treated as a column index

dframe1 = pd.read_csv('textfile1.csv',header=None)
dframe1

Unnamed: 0,0,1,2,3,4
0,q,r,s,t,apple
1,2,3,4,5,pear
2,a,s,d,f,rabbit
3,5,2,5,7,dog


In [4]:
# read textfile1.csv into a dataframe using read_csv()
# use header=None to avoid first row being treated as a column index
# use nrows=2 to read in only the first 2 rows of the source

dframe1 = pd.read_csv('textfile1.csv',header=None,nrows=2)
dframe1

Unnamed: 0,0,1,2,3,4
0,q,r,s,t,apple
1,2,3,4,5,pear


In [5]:
# read textfile1.csv into a dataframe using read_table()
# use header=None to avoid first row being treated as a column index
# pd.read_table() needs a separator char defined

dframe2 = pd.read_table('textfile1.csv',sep=',',header=None)
dframe2

Unnamed: 0,0,1,2,3,4
0,q,r,s,t,apple
1,2,3,4,5,pear
2,a,s,d,f,rabbit
3,5,2,5,7,dog


### output to csv

In [10]:
dframe1.to_csv('textfile1_out.csv')

### output to stdout

In [12]:
import sys

In [22]:
dframe2

Unnamed: 0,0,1,2,3,4
0,q,r,s,t,apple
1,2,3,4,5,pear
2,a,s,d,f,rabbit
3,5,2,5,7,dog


In [16]:
# output to stdout instead of file, define separator

dframe2.to_csv(sys.stdout,sep='_')

_0_1_2_3_4
0_q_r_s_t_apple
1_2_3_4_5_pear
2_a_s_d_f_rabbit
3_5_2_5_7_dog


In [17]:
dframe2.to_csv(sys.stdout,sep='?')

?0?1?2?3?4
0?q?r?s?t?apple
1?2?3?4?5?pear
2?a?s?d?f?rabbit
3?5?2?5?7?dog


In [21]:
# output to stdout instead of file, define separator
# limit output to selected columns

dframe2.to_csv(sys.stdout,sep=',',columns=[0,1,2])

,0,1,2
0,q,r,s
1,2,3,4
2,a,s,d
3,5,2,5


### input from json

In [21]:
# sample json object
# json syntax almost looks like a python dictionary

json_obj = """
{"employees":[
    {"firstName":"John", "lastName":"Doe"},
    {"firstName":"Anna", "lastName":"Smith"},
    {"firstName":"Peter", "lastName":"Jones"}
]}
"""

In [22]:
import json

In [26]:
data = json.loads(json_obj)
data

{'employees': [{'firstName': 'John', 'lastName': 'Doe'},
  {'firstName': 'Anna', 'lastName': 'Smith'},
  {'firstName': 'Peter', 'lastName': 'Jones'}]}

In [24]:
json.dumps(data)

'{"employees": [{"lastName": "Doe", "firstName": "John"}, {"lastName": "Smith", "firstName": "Anna"}, {"lastName": "Jones", "firstName": "Peter"}]}'

In [28]:
# make a dataframe from a json object

dframe = pd.DataFrame(data['employees'])
dframe

Unnamed: 0,firstName,lastName
0,John,Doe
1,Anna,Smith
2,Peter,Jones


### input from html

In [6]:
# pre-requisites: beautiful-soup and html5lib installed

url = 'http://www.fdic.gov/bank/individual/failed/banklist.html'

dframe_list = pd.read_html(url)

In [16]:
len(dframe_list)

1

In [17]:
dframe_list[0].head(1)

Unnamed: 0,Bank Name,City,ST,CERT,Acquiring Institution,Closing Date,Updated Date,Loss Share Type,Agreement Terminated,Termination Date
0,Hometown National Bank,Longview,WA,35156,Twin City Bank,"October 2, 2015","October 15, 2015",,,


In [18]:
# read in the first item from result list into a dataframe

dframe = dframe_list[0]

In [19]:
dframe.columns.values

array(['Bank Name', 'City', 'ST', 'CERT', 'Acquiring Institution',
       'Closing Date', 'Updated Date', 'Loss Share Type',
       'Agreement Terminated', 'Termination Date'], dtype=object)

### input from excel

In [26]:
# pre-requisites: xlrd and openpyxl

xlsfile = pd.ExcelFile('textfile1.xlsx')

In [27]:
dframe = xlsfile.parse('Sheet1')

In [28]:
dframe

Unnamed: 0,Col1,Col2,Col3
,100,933,3400
,435,35435,493
,345,34553,200


In [39]:
# label the row index

dframe.index = np.arange(3)
dframe

Unnamed: 0,Col1,Col2,Col3
0,100,933,3400
1,435,35435,493
2,345,34553,200


In [None]:
# full pd.ExcelFile() documentation here: 
# http://pandas.pydata.org/pandas-docs/version/0.13.1/generated/pandas.io.excel.ExcelFile.parse.html