In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('../animals.csv')

In [3]:
df

Unnamed: 0,brainwt,bodywt,animal
0,3.385,44.500,Arctic_fox
1,0.480,15.499,Owl_monkey
2,1.350,8.100,Beaver
3,464.983,423.012,Cow
4,36.328,119.498,Gray_wolf
...,...,...,...
57,160.004,169.000,Brazilian_tapir
58,0.900,2.600,Tenrec
59,1.620,11.400,Phalanger
60,0.104,2.500,Tree_shrew


In [4]:
# indexing (.index)
# selecting rows and columns (square brackets)
# subsetting using functions (.loc, .iloc)
# filtering (fancy indexing)

In [6]:
# Data selection in Series
    # Series as dictionaries
    # Series as one dimensional array, vecteur
    # Indexers: loc, iloc
# Data Selection in DataFrame
    # DataFrame as a dictionary
    # DataFrame as two-dimensional array

In [7]:
# Series as a dictionary

In [8]:
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])

In [9]:
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [10]:
data['b']

0.5

In [11]:
'a' in data

True

In [12]:
'f' in data

False

In [13]:
data.keys()

Index(['a', 'b', 'c', 'd'], dtype='object')

In [14]:
list(data.items())

[('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0)]

In [15]:
data['e'] = 1.25

In [16]:
data

a    0.25
b    0.50
c    0.75
d    1.00
e    1.25
dtype: float64

In [17]:
# Series as a 1-dim array / vecteur

In [18]:
data['a':'c']

a    0.25
b    0.50
c    0.75
dtype: float64

In [20]:
data[0:3]

a    0.25
b    0.50
c    0.75
dtype: float64

In [21]:
data[(data > 0.3) & (data < 0.8)]

b    0.50
c    0.75
dtype: float64

In [22]:
data[['a', 'e']]

a    0.25
e    1.25
dtype: float64

In [24]:
# data selection in dataframe

In [25]:
# dataframe as a dictionary

In [33]:
area = pd.Series({'IDF': 108977, 'PACA': 65929, 'BRETAGNE': 87654})
popu = pd.Series({'IDF': 15000000, 'PACA': 1000000, 'BRETAGNE': 5000000})

In [34]:
data = pd.DataFrame({'area': area, 'popu': popu})

In [35]:
data

Unnamed: 0,area,popu
IDF,108977,15000000
PACA,65929,1000000
BRETAGNE,87654,5000000


In [36]:
data['area']

IDF         108977
PACA         65929
BRETAGNE     87654
Name: area, dtype: int64

In [37]:
data.popu

IDF         15000000
PACA         1000000
BRETAGNE     5000000
Name: popu, dtype: int64

In [38]:
data.area is data['area']

True

In [40]:
data.pop is data['popu']

False

In [42]:
data['density'] = data['popu'] / data['area']

In [43]:
data['density']

IDF         137.643723
PACA         15.167832
BRETAGNE     57.042462
Name: density, dtype: float64

In [44]:
# dataframe as a 2-dim array / matrix / matrice (vf)

In [45]:
data.values

array([[1.08977000e+05, 1.50000000e+07, 1.37643723e+02],
       [6.59290000e+04, 1.00000000e+06, 1.51678321e+01],
       [8.76540000e+04, 5.00000000e+06, 5.70424624e+01]])

In [46]:
data

Unnamed: 0,area,popu,density
IDF,108977,15000000,137.643723
PACA,65929,1000000,15.167832
BRETAGNE,87654,5000000,57.042462


In [47]:
data.T

Unnamed: 0,IDF,PACA,BRETAGNE
area,108977.0,65929.0,87654.0
popu,15000000.0,1000000.0,5000000.0
density,137.6437,15.167832,57.04246


In [49]:
data.values[0]

array([1.08977000e+05, 1.50000000e+07, 1.37643723e+02])

In [50]:
data

Unnamed: 0,area,popu,density
IDF,108977,15000000,137.643723
PACA,65929,1000000,15.167832
BRETAGNE,87654,5000000,57.042462


In [51]:
data.values[0:2, 1:]

array([[1.50000000e+07, 1.37643723e+02],
       [1.00000000e+06, 1.51678321e+01]])

In [52]:
# .loc et .iloc

In [53]:
data.iloc[0:2, 1:]

Unnamed: 0,popu,density
IDF,15000000,137.643723
PACA,1000000,15.167832


In [54]:
data.loc[:'PACA', :'popu']

Unnamed: 0,area,popu
IDF,108977,15000000
PACA,65929,1000000


In [55]:
data.loc[data.density > 100, ['popu', 'density']]

Unnamed: 0,popu,density
IDF,15000000,137.643723


In [56]:
data.iloc[0,2] = 90
data

Unnamed: 0,area,popu,density
IDF,108977,15000000,90.0
PACA,65929,1000000,15.167832
BRETAGNE,87654,5000000,57.042462


In [59]:
data

Unnamed: 0,area,popu,density
IDF,108977,15000000,90.0
PACA,65929,1000000,15.167832
BRETAGNE,87654,5000000,57.042462


In [63]:
data.loc['IDF':'PACA', ['popu']]

Unnamed: 0,popu
IDF,15000000
PACA,1000000


In [66]:
data.iloc[0:2, 1]

IDF     15000000
PACA     1000000
Name: popu, dtype: int64

In [67]:
data.iloc[0:2, 1:2]

Unnamed: 0,popu
IDF,15000000
PACA,1000000
