In [1]:
import pandas as pd

clients = {
    'code': ['C11', 'C22', 'C33', 'C44'],
    'nom': ['Martin', 'Durand', 'Leboeuf', 'Carolin'],
    'age': [50, 40, 20, 35],
    'ville': ['Bordeaux', 'Pessac', 'Bordeaux', 'La Rochelle'],
    'cuisine_a_theme': ['y', 'n', 'n', 'y'],
    'livraison': ['y', 'y', 'n', 'n']
}

# Create DataFrame
df = pd.DataFrame(clients)
df

Unnamed: 0,code,nom,age,ville,cuisine_a_theme,livraison
0,C11,Martin,50,Bordeaux,y,y
1,C22,Durand,40,Pessac,n,y
2,C33,Leboeuf,20,Bordeaux,n,n
3,C44,Carolin,35,La Rochelle,y,n


## Indexing

In [15]:
# We can specify a column for pandas to use as index
# Like pandas index, those values should be unique
# for uniquer identifying

# Note that the layout oa columns names changed up a bit

df2 = df.set_index('code')
df2

In [3]:
# It now makes it possible to use this new index with .loc
df2.loc['C11']

nom                  Martin
age                      50
ville              Bordeaux
cuisine_a_theme           y
livraison                 y
Name: C11, dtype: object

In [4]:
# It's possible to revert the index change
# drop parameter can avoid the old index being added as a column
# drop=True

df2.reset_index()

Unnamed: 0,code,nom,age,ville,cuisine_a_theme,livraison
0,C11,Martin,50,Bordeaux,y,y
1,C22,Durand,40,Pessac,n,y
2,C33,Leboeuf,20,Bordeaux,n,n
3,C44,Carolin,35,La Rochelle,y,n


In [5]:
# Count number elements in Serie
df['ville'].count()

4

In [6]:
# Count number of values in Serie
df['ville'].value_counts()

Bordeaux       2
Pessac         1
La Rochelle    1
Name: ville, dtype: int64

In [7]:
# Filter data from array list
names = ['Durand', 'Leboeuf']

df.loc[df['nom'].isin(names)]

Unnamed: 0,code,nom,age,ville,cuisine_a_theme,livraison
1,C22,Durand,40,Pessac,n,y
2,C33,Leboeuf,20,Bordeaux,n,n


## Use filters

In [8]:
# Note that .loc allows us to use a filter/pattern to search
filtr = df['ville'] == "Bordeaux"
df.loc[filtr]

Unnamed: 0,code,nom,age,ville,cuisine_a_theme,livraison
0,C11,Martin,50,Bordeaux,y,y
2,C33,Leboeuf,20,Bordeaux,n,n


In [9]:
# "&", "|" and "~" can be used for more filtering
# "&" : and (to chain conditions)
# "|" : or (one pattern OR the other)
# "~" : for reverse/negative search

fitr = (df['ville'] == "Bordeaux") & (df['livraison'] == 'y')
df.loc[~filtr]

Unnamed: 0,code,nom,age,ville,cuisine_a_theme,livraison
1,C22,Durand,40,Pessac,n,y
3,C44,Carolin,35,La Rochelle,y,n


In [14]:
# String operations can also be applied
bdx_pattern = df['ville'].str.contains("Bordeaux")
df.loc[bdx_pattern]

Unnamed: 0,code,nom,age,ville,cuisine_a_theme,livraison
0,C11,Martin,50,Bordeaux,y,y
2,C33,Leboeuf,20,Bordeaux,n,n


145