# PANDAS

### Hands on!

In [1]:
import pandas as pd
import numpy as np

### Pandas Series

In [2]:
g7_pop = pd.Series([35.467, 63.951, 80.940, 60.665, 127.061, 64.511, 318.523])

In [3]:
g7_pop

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
dtype: float64

In [4]:
g7_pop.name = 'G7 Population in millions'

In [5]:
g7_pop

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
Name: G7 Population in millions, dtype: float64

In [6]:
g7_pop.values

array([ 35.467,  63.951,  80.94 ,  60.665, 127.061,  64.511, 318.523])

In [7]:
type(g7_pop)

pandas.core.series.Series

In [8]:
type(g7_pop.values)

numpy.ndarray

In [9]:
g7_pop[0]

35.467

In [10]:
g7_pop[2]

80.94

In [11]:
g7_pop.index

RangeIndex(start=0, stop=7, step=1)

In [12]:
g7_pop.index = [
    'Canada',
    'France',
    'Germany',
    'Italy',
    'Japan',
    'UK',
    'USA'
]

In [13]:
g7_pop

Canada      35.467
France      63.951
Germany     80.940
Italy       60.665
Japan      127.061
UK          64.511
USA        318.523
Name: G7 Population in millions, dtype: float64

In [15]:
pd.Series({
    'Canada': 392.1,
    'Franse': 21.1,
    'Ukrain': 201.33,
    'USA': 2881001.133
})

Canada        392.100
Franse         21.100
Ukrain        201.330
USA       2881001.133
dtype: float64

In [16]:
pd.Series([35.467, 63.951, 80.940, 60.665, 127.061, 64.511, 318.523],
          index=['Canada', 'France', 'Germany', 'Italy', 'Japan', 'UK', 'USA'],
          name='G7 Population in millions')

Canada      35.467
France      63.951
Germany     80.940
Italy       60.665
Japan      127.061
UK          64.511
USA        318.523
Name: G7 Population in millions, dtype: float64

In [17]:
g7_pop['Canada']

35.467

In [18]:
g7_pop['Japan']

127.061

In [19]:
g7_pop[1]

63.951

In [20]:
g7_pop.iloc[0]

35.467

In [21]:
g7_pop[1:3]

France     63.951
Germany    80.940
Name: G7 Population in millions, dtype: float64

In [22]:
g7_pop

Canada      35.467
France      63.951
Germany     80.940
Italy       60.665
Japan      127.061
UK          64.511
USA        318.523
Name: G7 Population in millions, dtype: float64

In [23]:
g7_pop[['Italy', 'France']]

Italy     60.665
France    63.951
Name: G7 Population in millions, dtype: float64

In [24]:
g7_pop[[0, 1]]

Canada    35.467
France    63.951
Name: G7 Population in millions, dtype: float64

In [25]:
g7_pop['Canada':'Italy']

Canada     35.467
France     63.951
Germany    80.940
Italy      60.665
Name: G7 Population in millions, dtype: float64

.

### Conditional Selection (boolean arrays)
The same boolean array techniques we saw applied to numpy arrys can be used for Pandas _Series_

In [26]:
g7_pop

Canada      35.467
France      63.951
Germany     80.940
Italy       60.665
Japan      127.061
UK          64.511
USA        318.523
Name: G7 Population in millions, dtype: float64

In [27]:
g7_pop > 70

Canada     False
France     False
Germany     True
Italy      False
Japan       True
UK         False
USA         True
Name: G7 Population in millions, dtype: bool

In [29]:
g7_pop[g7_pop > 70]

Germany     80.940
Japan      127.061
USA        318.523
Name: G7 Population in millions, dtype: float64

In [30]:
g7_pop.mean()

107.30257142857144

In [31]:
g7_pop.std()

97.24996987121581

In [37]:
g7_pop[(g7_pop < g7_pop.mean() - g7_pop.std() / 2) |
       (g7_pop > g7_pop.mean() + g7_pop.std() / 2)]

Canada     35.467
USA       318.523
Name: G7 Population in millions, dtype: float64

.

### Operations and methods
Series also support vectorized operations and aggeregation function as Numpy

In [33]:
g7_pop

Canada      35.467
France      63.951
Germany     80.940
Italy       60.665
Japan      127.061
UK          64.511
USA        318.523
Name: G7 Population in millions, dtype: float64

In [34]:
g7_pop * 1_000_000

Canada      35467000.0
France      63951000.0
Germany     80940000.0
Italy       60665000.0
Japan      127061000.0
UK          64511000.0
USA        318523000.0
Name: G7 Population in millions, dtype: float64

In [35]:
g7_pop.mean()

107.30257142857144

In [38]:
np.log(g7_pop)

Canada     3.568603
France     4.158117
Germany    4.393708
Italy      4.105367
Japan      4.844667
UK         4.166836
USA        5.763695
Name: G7 Population in millions, dtype: float64

In [39]:
g7_pop['France': 'Italy'].mean()

68.51866666666666

.

### Boolean arrays
(work in the same way as numpy)

In [40]:
g7_pop[(g7_pop > 80) | (g7_pop < 40)]

Canada      35.467
Germany     80.940
Japan      127.061
USA        318.523
Name: G7 Population in millions, dtype: float64

In [41]:
g7_pop > 80

Canada     False
France     False
Germany     True
Italy      False
Japan       True
UK         False
USA         True
Name: G7 Population in millions, dtype: bool

In [42]:
g7_pop < 40

Canada      True
France     False
Germany    False
Italy      False
Japan      False
UK         False
USA        False
Name: G7 Population in millions, dtype: bool

In [43]:
g7_pop < 200

Canada      True
France      True
Germany     True
Italy       True
Japan       True
UK          True
USA        False
Name: G7 Population in millions, dtype: bool

In [44]:
g7_pop[(g7_pop > 80) & (g7_pop < 200)]

Germany     80.940
Japan      127.061
Name: G7 Population in millions, dtype: float64

.

### Modifying series

In [45]:
g7_pop['Canada'] = 40.5

In [46]:
g7_pop

Canada      40.500
France      63.951
Germany     80.940
Italy       60.665
Japan      127.061
UK          64.511
USA        318.523
Name: G7 Population in millions, dtype: float64

In [47]:
g7_pop.iloc[-1] = 500

In [48]:
g7_pop

Canada      40.500
France      63.951
Germany     80.940
Italy       60.665
Japan      127.061
UK          64.511
USA        500.000
Name: G7 Population in millions, dtype: float64

In [49]:
g7_pop[g7_pop < 70]

Canada    40.500
France    63.951
Italy     60.665
UK        64.511
Name: G7 Population in millions, dtype: float64

In [50]:
g7_pop[g7_pop < 70] = 99.99

In [51]:
g7_pop

Canada      99.990
France      99.990
Germany     80.940
Italy       99.990
Japan      127.061
UK          99.990
USA        500.000
Name: G7 Population in millions, dtype: float64