In [103]:
import pandas as pd
A = pd.DataFrame({'Col1': [1, 2, 3], 
                  'Col2': [4, 4, 6]}, 
                 index=['rowA1', 'rowA2', 'rowA3'])
print("Matrice A\n", A)
B = pd.DataFrame({'Col1': [1, 3], 
                  'Col3': [7, 8], 
                  'Col4': [9, 10]}, 
                 index=['rowB1', 'rowB2'])
print("\nMatrice B\n", B)

Matrice A
        Col1  Col2
rowA1     1     4
rowA2     2     4
rowA3     3     6

Matrice B
        Col1  Col3  Col4
rowB1     1     7     9
rowB2     3     8    10


## Valeurs d'une colonne (freq. distrib)

In [62]:
A["Col1"].value_counts()

3    1
2    1
1    1
Name: Col1, dtype: int64

## Filtre / eq. clause Where

In [63]:
B.loc[B['Col3'] == 7]
# ou B[B['Col3'] == 7]

Unnamed: 0,Col1,Col3,Col4
rowB1,1,7,9


In [64]:
B.loc[B['Col3'] == 7]['Col1']
# ou B[B['Col3'] == 7]['Col1']

rowB1    1
Name: Col1, dtype: int64

In [65]:
# Plusieurs conditions avec un ou logique (attention pas de or mais un pipe à la place)
B.loc[(B['Col3'] == 7)  | (B['Col3'] > 1)]['Col1']

rowB1    1
rowB2    3
Name: Col1, dtype: int64

In [66]:
# Plusieurs conditions avec un et logique (attention pas de and mais un & à la place)
B.loc[(B['Col3'] == 7) & (B['Col3'] > 1)]['Col1']

rowB1    1
Name: Col1, dtype: int64

### Utilisation de la commande Filter

In [67]:
A.filter(like='A3', axis=0) # Filtre la colonne 0 qui doit avoir les lettres A3

Unnamed: 0,Col1,Col2
rowA3,3,6


## Jointure normale "Inner"

In [68]:
pd.merge(A, B, how='inner', on='Col1')

Unnamed: 0,Col1,Col2,Col3,Col4
0,1,4,7,9
1,3,6,8,10


## Jointure Gauche

In [69]:
# Jointure gauche A -> B
pd.merge(A, B, how='left', on='Col1')

Unnamed: 0,Col1,Col2,Col3,Col4
0,1,4,7.0,9.0
1,2,4,,
2,3,6,8.0,10.0


In [70]:
# Jointure gauche B -> A
pd.merge(B,A, how='left', on='Col1')

Unnamed: 0,Col1,Col3,Col4,Col2
0,1,7,9,4
1,3,8,10,6


## Full join

In [90]:
pd.merge(A, B, how='outer', on='Col1')

Unnamed: 0,Col1,Col2,Col3,Col4
0,1,4,7.0,9.0
1,2,4,,
2,3,6,8.0,10.0


## Union : Assemblage/concaténation de deux matrice (DataFrame)

In [71]:
pd.concat([A, B])

Unnamed: 0,Col1,Col2,Col3,Col4
rowA1,1,4.0,,
rowA2,2,4.0,,
rowA3,3,6.0,,
rowB1,1,,7.0,9.0
rowB2,3,,8.0,10.0


## Groupement (group by)

In [83]:
print ("Group By / Count:\n ", A.groupby("Col2")['Col1'].size(), "\n")
print ("Group By / Moyenne:\n ", A.groupby("Col2")['Col1'].mean(), "\n")

Group By / Count:
  Col2
4    2
6    1
Name: Col1, dtype: int64 

Group By / Moyenne:
  Col2
4    1.5
6    3.0
Name: Col1, dtype: float64 



In [89]:
# Multiple opérations
import numpy as np
A.groupby('Col2').agg({'Col1': np.mean, 'Col2': np.size})

Unnamed: 0_level_0,Col1,Col2
Col2,Unnamed: 1_level_1,Unnamed: 2_level_1
4,1.5,2
6,3.0,1


## Parcourir une matrice/DF

In [94]:
# récupération des index dans la boucle
for idx, f in enumerate(A["Col1"]):
    print (f, " | ", idx)

1  |  0
2  |  1
3  |  2


In [108]:
A.iloc[2,1] # [ligne, colonne] ... commence par zéro !

6

In [111]:
#Dimension matrice
A.shape

(3, 2)

In [117]:
A[1:2]

Unnamed: 0,Col1,Col2
rowA2,2,4


In [123]:
# ligne N°1 seulement
A.iloc[0,:]

Col1    1
Col2    4
Name: rowA1, dtype: int64

In [127]:
A.iloc[0]

Col1    1
Col2    4
Name: rowA1, dtype: int64

In [130]:
A.loc['rowA1']

Col1    1
Col2    4
Name: rowA1, dtype: int64