# Pandas

In [4]:
#!uv pip install pandas
import pandas as pd
import numpy as np

## Series

In [6]:
s = pd.Series(np.arange(4), index=['a', 'b', 'c', 'd'])

In [7]:
s

a    0
b    1
c    2
d    3
dtype: int64

In [8]:
s > 2

a    False
b    False
c    False
d     True
dtype: bool

In [9]:
s[s > 1]

c    2
d    3
dtype: int64

In [10]:
s.mean()

np.float64(1.5)

In [11]:
s.describe()

count    4.000000
mean     1.500000
std      1.290994
min      0.000000
25%      0.750000
50%      1.500000
75%      2.250000
max      3.000000
dtype: float64

## Dataframes

In [13]:
data = {'country': ['Belgium', 'France', 'Germany', 'Netherlands', 'United Kingdom'],
       'population': [11.3, 64.3, 81.3, 16.9, 64.9],
       'area': [30510, 671308, 357050, 41526, 244820],
       'capital': ['Brussels', 'Paris', 'Berlin', 'Amsterdam', 'London']}

In [14]:
countries = pd.DataFrame(data)

In [15]:
countries

Unnamed: 0,country,population,area,capital
0,Belgium,11.3,30510,Brussels
1,France,64.3,671308,Paris
2,Germany,81.3,357050,Berlin
3,Netherlands,16.9,41526,Amsterdam
4,United Kingdom,64.9,244820,London


In [16]:
countries.index

RangeIndex(start=0, stop=5, step=1)

In [17]:
countries.values

array([['Belgium', 11.3, 30510, 'Brussels'],
       ['France', 64.3, 671308, 'Paris'],
       ['Germany', 81.3, 357050, 'Berlin'],
       ['Netherlands', 16.9, 41526, 'Amsterdam'],
       ['United Kingdom', 64.9, 244820, 'London']], dtype=object)

In [18]:
countries.set_index('country')

Unnamed: 0_level_0,population,area,capital
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Belgium,11.3,30510,Brussels
France,64.3,671308,Paris
Germany,81.3,357050,Berlin
Netherlands,16.9,41526,Amsterdam
United Kingdom,64.9,244820,London


In [19]:
countries

Unnamed: 0,country,population,area,capital
0,Belgium,11.3,30510,Brussels
1,France,64.3,671308,Paris
2,Germany,81.3,357050,Berlin
3,Netherlands,16.9,41526,Amsterdam
4,United Kingdom,64.9,244820,London


In [20]:
countries.set_index('country', inplace=True)

In [21]:
countries

Unnamed: 0_level_0,population,area,capital
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Belgium,11.3,30510,Brussels
France,64.3,671308,Paris
Germany,81.3,357050,Berlin
Netherlands,16.9,41526,Amsterdam
United Kingdom,64.9,244820,London


In [22]:
countries['population']

country
Belgium           11.3
France            64.3
Germany           81.3
Netherlands       16.9
United Kingdom    64.9
Name: population, dtype: float64

In [23]:
type(countries['population'])

pandas.core.series.Series

In [24]:
countries.loc['France']

population      64.3
area          671308
capital        Paris
Name: France, dtype: object

In [25]:
countries.loc['France', 'capital']

'Paris'

In [26]:
countries.iloc[0, 0] # upper left element

np.float64(11.3)

In [27]:
countries['density'] =  countries['population']/countries['area']*10**6

In [28]:
countries

Unnamed: 0_level_0,population,area,capital,density
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Belgium,11.3,30510,Brussels,370.37037
France,64.3,671308,Paris,95.783158
Germany,81.3,357050,Berlin,227.699202
Netherlands,16.9,41526,Amsterdam,406.973944
United Kingdom,64.9,244820,London,265.092721


In [29]:
countries['density'] > 250

country
Belgium            True
France            False
Germany           False
Netherlands        True
United Kingdom     True
Name: density, dtype: bool

In [30]:
high_pop = countries['density'] > 250

In [31]:
countries.loc[high_pop]

Unnamed: 0_level_0,population,area,capital,density
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Belgium,11.3,30510,Brussels,370.37037
Netherlands,16.9,41526,Amsterdam,406.973944
United Kingdom,64.9,244820,London,265.092721
