In [1]:
import pandas as pd

In [2]:
#Lire un fichier CSV
df = pd.read_csv('examples/ex1.csv')

In [3]:
df

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [4]:
# on peut aussi utiliser read_table en spécifiant un délimiteur
pd.read_table('examples/ex1.csv',sep=',')

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [5]:
#Afficher un header par défaut
pd.read_csv('examples/ex2.csv',header=None)

Unnamed: 0,0,1,2,3,4
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [6]:
#Ajouter un header
pd.read_csv('examples/ex2.csv',names=['a','b','c','d','message'])

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [7]:
# Choisir, spécifier la colonne d'index (index_col)
names=['a','b','c','d','message']

In [8]:
pd.read_csv('examples/ex2.csv',names=names,index_col='message')

Unnamed: 0_level_0,a,b,c,d
message,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
hello,1,2,3,4
world,5,6,7,8
foo,9,10,11,12


In [9]:
#index Hierarchique
pd.read_csv('examples/csv_mindex.csv',index_col=['key1','key2'])

Unnamed: 0_level_0,Unnamed: 1_level_0,value1,value2
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
one,a,1,2
one,b,3,4
one,c,5,6
one,d,7,8
two,a,9,10
two,b,11,12
two,c,13,14
two,d,15,16


In [10]:
#Sauter des lignes
pd.read_csv('examples/csv_mindex.csv',skiprows=[0,2,4])

Unnamed: 0,one,a,1,2
0,one,c,5,6
1,two,a,9,10
2,two,b,11,12
3,two,c,13,14
4,two,d,15,16


In [11]:
# Creer un grand fichier csv avec python
import numpy as np
import csv

In [12]:
with open('examples/ex0.csv','w',newline='') as fp:
    a = csv.writer(fp,delimiter=',')
    data = np.arange(15*4).reshape(15,4)
    a.writerows(data)

In [13]:
# Lire uniquement un nombre 'n' de lignes
pd.read_csv('examples/ex0.csv',nrows=3,names=['one','two','three','four'])

Unnamed: 0,one,two,three,four
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11


In [14]:
#Ecrire des données au format texte
data = pd.read_csv('examples/ex0.csv',names=['one','two','three','four'])

In [15]:
data

Unnamed: 0,one,two,three,four
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15
4,16,17,18,19
5,20,21,22,23
6,24,25,26,27
7,28,29,30,31
8,32,33,34,35
9,36,37,38,39


In [16]:
#générer un fichier csv séparé par des ','
data.to_csv('examples/out.csv')

In [17]:
#d'autres délimiteurs peuvent être utilisés
import sys#pour affichage console

In [18]:
data.to_csv(sys.stdout,sep='|')

|one|two|three|four
0|0|1|2|3
1|4|5|6|7
2|8|9|10|11
3|12|13|14|15
4|16|17|18|19
5|20|21|22|23
6|24|25|26|27
7|28|29|30|31
8|32|33|34|35
9|36|37|38|39
10|40|41|42|43
11|44|45|46|47
12|48|49|50|51
13|52|53|54|55
14|56|57|58|59


In [19]:
data = pd.read_csv('examples/ex0.csv',names=['one','two','three','four']) # j'ai modifié le fichier en supprimant certains elts (1,18,33)"""

In [20]:
# les valeurs manquantes sont remplacées par NULL
data.to_csv(sys.stdout, na_rep='NULL')

,one,two,three,four
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15
4,16,17,18,19
5,20,21,22,23
6,24,25,26,27
7,28,29,30,31
8,32,33,34,35
9,36,37,38,39
10,40,41,42,43
11,44,45,46,47
12,48,49,50,51
13,52,53,54,55
14,56,57,58,59


In [21]:
# Ne pas afficher les labels des lignes et colonnes
data.to_csv(sys.stdout, index=False, header=False)

0,1,2,3
4,5,6,7
8,9,10,11
12,13,14,15
16,17,18,19
20,21,22,23
24,25,26,27
28,29,30,31
32,33,34,35
36,37,38,39
40,41,42,43
44,45,46,47
48,49,50,51
52,53,54,55
56,57,58,59


In [22]:
# ecrire seulement un sous-ensemble des colonnes 
data.to_csv(sys.stdout, index=False,columns=['two','three'])

two,three
1,2
5,6
9,10
13,14
17,18
21,22
25,26
29,30
33,34
37,38
41,42
45,46
49,50
53,54
57,58


In [23]:
# Les Series aussi ont une méthode 'to_csv'
dates = pd.date_range('1/1/2000',periods=7)

In [24]:
ts = pd.Series(np.arange(7),index=dates)

In [25]:
ts.to_csv('examples/tseries.csv')

In [26]:
""" WORKING WITH DELIMITED fORMATS"""

' WORKING WITH DELIMITED fORMATS'

In [27]:
pd.DataFrame(np.array([['a','b','c'],[1,2,3],[1,2,3]])).to_csv('examples/ex7.csv',header=False,index=False)

In [28]:
f = open('examples/ex7.csv')

In [29]:
reader = csv.reader(f)

In [30]:
for line in reader:
    print(line)

['a', 'b', 'c']
['1', '2', '3']
['1', '2', '3']


In [31]:
with open('examples/ex7.csv') as f:
    lines = list(csv.reader(f))

In [32]:
header, values = lines[0],lines[1:]

In [33]:
data_dict = { h : v for h,v in zip(header,zip(*values))}

In [34]:
data_dict

{'a': ('1', '1'), 'b': ('2', '2'), 'c': ('3', '3')}