# Parsing functions in Pandas
* read_csv - load delimited data from a File, URL, or file-like object. Use comma as default delimiter
* read_table - load delimited data from a File, URL, or file-like object. Use tab('\t') as default delimiter
* read_fwf - read data with fixed width column format
* read_clipboard - reads data from the clipboard

In [4]:
import numpy as np
import pandas as pd

from pandas import Series, DataFrame

In [5]:
!cat data/sample.csv

male,female,year
1.5,2.5,2010
3,5,2011
6,10,2012
12,15,2013
20,22,2014

In [6]:
pd.read_csv('data/sample.csv') # or pd.read_table('data/sample.csv', sep=',')

Unnamed: 0,male,female,year
0,1.5,2.5,2010
1,3.0,5.0,2011
2,6.0,10.0,2012
3,12.0,15.0,2013
4,20.0,22.0,2014


In [7]:
pd.read_csv('data/data_with_comments.csv',skiprows=[0,1])

Unnamed: 0,male,female,year
0,1.5,2.5,2010
1,3.0,5.0,2011
2,6.0,10.0,2012
3,12.0,15.0,2013
4,20.0,22.0,2014


In [13]:
pd.read_csv('data/sample.csv', na_values=[5])

Unnamed: 0,male,female,year
0,1.0,,2009.0
1,1.5,2.5,2010.0
2,3.0,,2011.0
3,6.0,10.0,2012.0
4,12.0,15.0,2013.0
5,20.0,22.0,2014.0
6,30.0,30.0,


In [16]:
# reading large files in chunks
chunker = pd.read_csv('data/sample.csv', chunksize=1)
total = Series([])
for piece in chunker:
    total = total.add(piece['male'].value_counts(), fill_value=0)
    
total.sort_values(ascending=False)


30.0    1.0
20.0    1.0
12.0    1.0
6.0     1.0
3.0     1.0
1.5     1.0
1.0     1.0
dtype: float64

In [None]:
# search python pandas in Twitter
import requests
url = 'http://search.twitter.com/search.json?q=python%20pandas'
resp = requests.get(url)

# then parse the http response
import json
data = json.loads(resp.text)

# create a data frame from the tweets
tweet_fields = ['created_at', 'from_user', 'id', 'text']
tweets = DataFrame(data['results'], columns=tweet_fields)


