In [1]:
import numpy as np
import pandas as pd
np.random.seed(12345)
import matplotlib.pyplot as plt
%matplotlib inline
plt.rc('figure',figsize=(10,6))
np.set_printoptions(precision=3,suppress=True)

In [2]:
!cat dane/ex1.csv

a,b,c,d,message
1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo

In [3]:
df = pd.read_csv('dane/ex1.csv')
df

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [4]:
!cat dane/ex2.csv

1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo

In [5]:
pd.read_csv('dane/ex2.csv',header=None,names=['coś tam','druga','super','blabla','wiadomość'])

Unnamed: 0,coś tam,druga,super,blabla,wiadomość
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [6]:
names = ['coś tam','druga','super','blabla','wiadomość']
pd.read_csv('dane/ex2.csv',names=names,index_col='wiadomość')

Unnamed: 0_level_0,coś tam,druga,super,blabla
wiadomość,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
hello,1,2,3,4
world,5,6,7,8
foo,9,10,11,12


In [7]:
!cat dane/csv_mindex.csv

key1,key2,value1,value2
one,a,1,2
one,b,3,4
one,c,5,6
one,d,7,8
two,a,9,10
two,b,11,12
two,c,13,14
two,d,15,16


In [8]:
parsed = pd.read_csv('dane/csv_mindex.csv',index_col=['key1','key2'])
parsed

Unnamed: 0_level_0,Unnamed: 1_level_0,value1,value2
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
one,a,1,2
one,b,3,4
one,c,5,6
one,d,7,8
two,a,9,10
two,b,11,12
two,c,13,14
two,d,15,16


In [9]:
!cat dane/ex4.csv

# Cześć!
a,b,c,d,message
# Chciałem tylko trochę utrudnić Twoją pracę.
# Kto w ogóle wczytuje pliki CSV za pomocą komputera?
1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo

In [10]:
pd.read_csv('dane/ex4.csv',skiprows=[0,2,3])

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [11]:
!cat dane/ex5.csv

something,a,b,c,d,message
one,1,2,3,4,NA
two,5,6,,8,world
three,9,10,11,12,foo

In [12]:
result = pd.read_csv('dane/ex5.csv',na_values=['NULL',])
result

Unnamed: 0,something,a,b,c,d,message
0,one,1,2,3.0,4,
1,two,5,6,,8,world
2,three,9,10,11.0,12,foo


In [13]:
sentinels = {'message':['foo','NA'],'something':['two']}

In [14]:
pd.read_csv('dane/ex5.csv',na_values=sentinels)

Unnamed: 0,something,a,b,c,d,message
0,one,1,2,3.0,4,
1,,5,6,,8,world
2,three,9,10,11.0,12,


In [15]:
pd.options.display.max_rows=10
result = pd.read_csv('dane/ex6.csv',nrows=5)
result

Unnamed: 0,one,two,three,four,key
0,0.467976,-0.038649,-0.295344,-1.824726,L
1,-0.358893,1.404453,0.704965,-0.200638,B
2,-0.50184,0.659254,-0.421691,-0.057688,G
3,0.204886,1.074134,1.388361,-0.982404,R
4,0.354628,-0.133116,0.283763,-0.837063,Q


In [16]:
result.to_csv('dane/fiverows.csv')

In [17]:
result.to_dict()

{'one': {0: 0.467976300189,
  1: -0.358893469543,
  2: -0.50184039929,
  3: 0.204886212202,
  4: 0.354627914484},
 'two': {0: -0.0386485396255,
  1: 1.40445260007,
  2: 0.659253707223,
  3: 1.07413396504,
  4: -0.133115852296},
 'three': {0: -0.295344251987,
  1: 0.704964644926,
  2: -0.421690619312,
  3: 1.38836131252,
  4: 0.283762637978},
 'four': {0: -1.82472622729,
  1: -0.200638304015,
  2: -0.0576883018364,
  3: -0.982404023494,
  4: -0.837062961653},
 'key': {0: 'L', 1: 'B', 2: 'G', 3: 'R', 4: 'Q'}}

In [18]:
jsonsource = result.to_json('dane/jsourse.json')
jsonsource

In [19]:
xmlsrc = result.to_xml('dane/liczby.xml')

In [20]:
excel = result.to_excel('dane/data.xlsx')

In [21]:
inne = result.to_html('dane/info.html')

In [23]:
# nx1 = result.to_sql('dane/nn.sql')

In [25]:
# nx2 = result.to_hdf('dane/dane.hdf')

In [26]:
danejs = pd.read_json('dane/example.json')
danejs

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [27]:
danejs.to_json()

'{"a":{"0":1,"1":4,"2":7},"b":{"0":2,"1":5,"2":8},"c":{"0":3,"1":6,"2":9}}'

In [28]:
danejs.to_json(orient='records')

'[{"a":1,"b":2,"c":3},{"a":4,"b":5,"c":6},{"a":7,"b":8,"c":9}]'