In [1]:
import pandas as pd
import numpy as np

In [2]:
%%HTML
<iframe height="280" src="https://www.youtube.com/embed/HW29067qVWk" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>

## Series Examples

In [6]:
ds = pd.Series([1,2,3,4], dtype='int8', index=[4,3,2,1])
print(ds)

4    1
3    2
2    3
1    4
dtype: int8


In [9]:
ds = pd.Series({
	"d":4,
	"a":1,
	"c":3,
	"b":2,
	"e":5
})

ds


d    4
a    1
c    3
b    2
e    5
dtype: int64

In [14]:
ds[['a','b']]

a    1
b    2
dtype: int64

In [17]:
ds[::2]

d    4
c    3
e    5
dtype: int64

In [23]:
ds

d    4
a    1
c    3
b    2
e    5
dtype: int64

### Masking

In [24]:
ds>2
# mask = [True, True, False,True, True]
# ds[mask]
ds[ds>2]

d    4
c    3
e    5
dtype: int64

In [29]:
ds['b'] = 4

In [30]:
ds[ds==4]

d    4
b    4
dtype: int64

In [31]:
ds

d    4
a    1
c    3
b    4
e    5
dtype: int64

### aligned by index series

In [39]:
ds1 = pd.Series([1,3], index=["a","c"])
ds2 = pd.Series([2,3], index=["a","c"])


In [40]:
ds1

a    1
c    3
dtype: int64

In [41]:
ds2

a    2
c    3
dtype: int64

In [42]:
ds1+ds2

a    3
c    6
dtype: int64

## DataFrame Examples

### Create DataFrame

In [44]:
ds1 = pd.Series([1,3], index=["a","c"])
ds1

a    1
c    3
dtype: int64

In [51]:
df = pd.DataFrame(data=ds1)
df

Unnamed: 0,0
a,1
c,3


In [57]:
data = np.arange(1,10).reshape(3,3)
data

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [58]:

df=pd.DataFrame(data=data, index=[1,2,3], columns=['A','B','C'])
df

Unnamed: 0,A,B,C
1,1,2,3
2,4,5,6
3,7,8,9


## DF Manipulations

### Indexing

In [59]:
df

Unnamed: 0,A,B,C
1,1,2,3
2,4,5,6
3,7,8,9


In [62]:
df['A']

1    1
2    4
3    7
Name: A, dtype: int64

In [63]:
df[['A','C']]

Unnamed: 0,A,C
1,1,3
2,4,6
3,7,9


In [66]:
df.loc[2,'A']


4

In [67]:
df.iloc[1,0]

4

### loc vs iloc vs ['a']

In [69]:
df

Unnamed: 0,A,B,C
1,1,2,3
2,4,5,6
3,7,8,9


In [68]:
df.iloc[:,-1]

1    3
2    6
3    9
Name: C, dtype: int64

In [70]:
df.loc[:,'C']

1    3
2    6
3    9
Name: C, dtype: int64

In [71]:
df['C']

1    3
2    6
3    9
Name: C, dtype: int64

### Do not chain indexes

In [72]:
df

Unnamed: 0,A,B,C
1,1,2,3
2,4,5,6
3,7,8,9


In [80]:
# never do that (RAM consuming)
df['C'][:-1]

1    3
2    6
Name: C, dtype: int64

In [84]:
df.loc[:2,'C']

1    3
2    6
Name: C, dtype: int64

## DF IO methods

In [85]:
csv_file = './datasets/drinks.csv'
df = pd.read_csv(csv_file)
df

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,AS
1,Albania,89,132,54,4.9,EU
2,Algeria,25,0,14,0.7,AF
3,Andorra,245,138,312,12.4,EU
4,Angola,217,57,45,5.9,AF
...,...,...,...,...,...,...
188,Venezuela,333,100,3,7.7,SA
189,Vietnam,111,2,1,2.0,AS
190,Yemen,6,0,0,0.1,AS
191,Zambia,32,19,4,2.5,AF


In [88]:
df.head(3)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,AS
1,Albania,89,132,54,4.9,EU
2,Algeria,25,0,14,0.7,AF


In [89]:
df.tail(3)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
190,Yemen,6,0,0,0.1,AS
191,Zambia,32,19,4,2.5,AF
192,Zimbabwe,64,18,4,4.7,AF


In [90]:
df.shape

(193, 6)

Index(['country', 'beer_servings', 'spirit_servings', 'wine_servings',
       'total_litres_of_pure_alcohol', 'continent'],
      dtype='object')