In [76]:
import numpy as np
import pandas as pd

# pandas series (1 of 2 foundational data structures)
- series is a sequence of elements that looks like a python list or numpy array
- you can assign a lot of attributes to it (associated datatype, name, etc)
- under this is a numpy array
- make it like a dictionary (but ordered)

In [77]:
g7_pop = pd.Series([35.467, 63.951, 80.940, 60.665, 127.061, 64.511, 318.523]) # population of g7 countries in millions of people
g7_pop.name = "G7 Population in millions of people" # give data structure a name
print(g7_pop)


0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
Name: G7 Population in millions of people, dtype: float64


In [78]:
g7_pop[2] # can select elements, each series gets an index (default)

80.94

In [79]:
g7_pop.index = [
    "Canada",
    "France",
    "Germany",
    "Italy",
    "Japan",
    "UK",
    "USA"
]# explicit index for the series (now you can refer to element in the series by a meaningful name)
print(g7_pop)

Canada      35.467
France      63.951
Germany     80.940
Italy       60.665
Japan      127.061
UK          64.511
USA        318.523
Name: G7 Population in millions of people, dtype: float64


In [80]:
# another option
g7 = pd.Series(
    {
    'Canada' : 35.467,
    'France' : 63.951,
    'Germany': 80.94,
    'Italy'  : 60.665,
    'Japan'  : 127.061,
    'UK'     : 64.511,
    'USA'    : 318.523
    },
    name="G7 Population in millions of people")
print(g7)


Canada      35.467
France      63.951
Germany     80.940
Italy       60.665
Japan      127.061
UK          64.511
USA        318.523
Name: G7 Population in millions of people, dtype: float64


In [81]:
# another option
g77 = pd.Series(
    [35.467, 63.951, 80.940, 60.665, 127.061, 64.511, 318.523],
    index=["Canada", "France", "Germany", "Italy", "Japan", "UK", "USA"],
    name="G7 Population in millions of people")
print(g77)

Canada      35.467
France      63.951
Germany     80.940
Italy       60.665
Japan      127.061
UK          64.511
USA        318.523
Name: G7 Population in millions of people, dtype: float64


In [82]:
# then just use the index...
g7['UK']

64.511

In [83]:
# it is still ordered...use the .iloc
g7.iloc[-1] # last element

318.523

In [84]:
# get by a search, .loc
g7.loc['France']

63.951

In [85]:
# get a few, by name
g7[['Italy', 'Germany']]

Italy      60.665
Germany    80.940
Name: G7 Population in millions of people, dtype: float64

In [86]:
# get a few, by index
g7.iloc[[3, 4]]

Italy     60.665
Japan    127.061
Name: G7 Population in millions of people, dtype: float64

In [87]:
# the upper limit IS included for pandas
g7['Canada' : 'Italy'] # includes Italy...

Canada     35.467
France     63.951
Germany    80.940
Italy      60.665
Name: G7 Population in millions of people, dtype: float64

In [88]:
g7.iloc[4:6] # upper limit not included

Japan    127.061
UK        64.511
Name: G7 Population in millions of people, dtype: float64

# operations
- performed directly on top of the pandas data structure
- broadcast basic arithmetic, boolean (logical), comparison, and other operations
- good for selection/filtering

In [89]:
g7 > 70

Canada     False
France     False
Germany     True
Italy      False
Japan       True
UK         False
USA         True
Name: G7 Population in millions of people, dtype: bool

In [90]:
g7[g7 > 50]

France      63.951
Germany     80.940
Italy       60.665
Japan      127.061
UK          64.511
USA        318.523
Name: G7 Population in millions of people, dtype: float64

In [91]:
g7[g7 > g7.mean()]

Japan    127.061
USA      318.523
Name: G7 Population in millions of people, dtype: float64

In [92]:
g7[(g7 < 50) | (g7 > g7.mean()-10)]

Canada     35.467
Japan     127.061
USA       318.523
Name: G7 Population in millions of people, dtype: float64

In [93]:
g7 * 1_000_000

Canada      35467000.0
France      63951000.0
Germany     80940000.0
Italy       60665000.0
Japan      127061000.0
UK          64511000.0
USA        318523000.0
Name: G7 Population in millions of people, dtype: float64

In [94]:
np.log(g7) # numpy operations are ok on a pandas series

Canada     3.568603
France     4.158117
Germany    4.393708
Italy      4.105367
Japan      4.844667
UK         4.166836
USA        5.763695
Name: G7 Population in millions of people, dtype: float64

# modifications
- simple reassignment like other libraries

In [95]:
g77['Canada'] = 44.4 # by name
g77[-1] = 411.22 # by index
print(g77)

Canada      44.400
France      63.951
Germany     80.940
Italy       60.665
Japan      127.061
UK          64.511
USA        411.220
Name: G7 Population in millions of people, dtype: float64


In [96]:
g77[g77 < 100] = 99.99 # conditional assignment
print(g77)

Canada      99.990
France      99.990
Germany     99.990
Italy       99.990
Japan      127.061
UK          99.990
USA        411.220
Name: G7 Population in millions of people, dtype: float64
