In [1]:
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline 
matplotlib.style.use('ggplot')
import numpy as np
import pandas as pd
from StringIO import StringIO

In [2]:
data = 'a,b,c~1,2,3~4,5,6'
pd.read_csv(StringIO(data), lineterminator='~')

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6


In [3]:
data = 'a,b,c\n1,2,3\n4,5,6\n7,8,9'
print(data)
df = pd.read_csv(StringIO(data), dtype={'b': object, 'c': np.float64})
df.dtypes

a,b,c
1,2,3
4,5,6
7,8,9


a      int64
b     object
c    float64
dtype: object

In [4]:
#If the header is in a row other than the first, pass the row number to header. This will skip the preceding rows:
data = 'skip this skip it\na,b,c\n1,2,3\n4,5,6\n7,8,9'
pd.read_csv(StringIO(data), header=1)

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [5]:
data = '\na,b,c\n  \n# commented line\n1,2,3\n\n4,5,6'
print(data)
pd.read_csv(StringIO(data), comment='#')


a,b,c
  
# commented line
1,2,3

4,5,6


Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6


In [6]:
#If a file has one more column of data than the number of column names, 
#the first column will be used as the DataFrame’s row names:
data = 'a,b,c\n4,apple,bat,5.7\n8,orange,cow,10'
a = pd.read_csv(StringIO(data))
print a.to_string()
#-----
data = 'index,a,b,c\n4,apple,bat,5.7\n8,orange,cow,10'
pd.read_csv(StringIO(data), index_col=0)

        a    b     c
4   apple  bat   5.7
8  orange  cow  10.0


Unnamed: 0_level_0,a,b,c
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4,apple,bat,5.7
8,orange,cow,10.0


# Multi index and Multi column..

In [7]:
idx = pd.MultiIndex.from_product([["A", "B"], [1, 2, 3]])
col = pd.MultiIndex.from_product([["X", "Y"], ["aa", "bb"]])
df = pd.DataFrame(np.random.randint(0, 100, size=(6, 4)), index=idx, columns=col)
df.index.names = "name1", "name2"
df.columns.names = "NAME1", "NAME2"

df.to_csv("tmp.csv")
df2 = pd.read_csv("tmp.csv", header=[0, 1], index_col=[0, 1])
df2

Unnamed: 0_level_0,NAME1,X,X,Y,Y
Unnamed: 0_level_1,NAME2,aa,bb,aa,bb
name1,name2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
A,1,80,69,59,34
A,2,73,28,37,97
A,3,12,99,12,21
B,1,71,7,94,4
B,2,57,37,72,2
B,3,38,83,60,25


In [8]:
from pandas.tslib import Timestamp
dfd = pd.DataFrame(np.random.randn(5, 2), columns=list('AB'))
dfd['date'] = Timestamp('20130101')
dfd = dfd.sort_index(1, ascending=False)
json = dfd.to_json(date_format='iso')
json

'{"date":{"0":"2013-01-01T00:00:00.000Z","1":"2013-01-01T00:00:00.000Z","2":"2013-01-01T00:00:00.000Z","3":"2013-01-01T00:00:00.000Z","4":"2013-01-01T00:00:00.000Z"},"B":{"0":0.4508070196,"1":-1.3594413567,"2":1.9277507732,"3":0.4267972503,"4":0.1676905898},"A":{"0":-0.9385826927,"1":0.3547385331,"2":1.2125178155,"3":-0.6706845485,"4":0.1957243116}}'

In [9]:
from pandas.io.json import json_normalize

data = [{'state': 'Florida',
  'shortname': 'FL',
  'info': {
       'governor': 'Rick Scott'
  },
  'counties': [{'name': 'Dade', 'population': 12345},
              {'name': 'Broward', 'population': 40000},
              {'name': 'Palm Beach', 'population': 60000}]},
 {'state': 'Ohio',
  'shortname': 'OH',
  'info': {
       'governor': 'John Kasich'
  },
  'counties': [{'name': 'Summit', 'population': 1234},
               {'name': 'Cuyahoga', 'population': 1337}]}]


json_normalize(data, 'counties', ['state', 'shortname', ['info', 'governor']])

Unnamed: 0,name,population,info.governor,state,shortname
0,Dade,12345,Rick Scott,Florida,FL
1,Broward,40000,Rick Scott,Florida,FL
2,Palm Beach,60000,Rick Scott,Florida,FL
3,Summit,1234,John Kasich,Ohio,OH
4,Cuyahoga,1337,John Kasich,Ohio,OH
