In [173]:
import pandas as pd
import numpy as np

# Accessing data with pandas

In [174]:
coffee = pd.read_csv('./warmup-data/coffee.csv')
coffee.head()

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,30
3,Tuesday,Latte,20
4,Wednesday,Espresso,35


## creating samples

In [175]:
coffee.sample(10)   # add random_state=seed to make sample deterministic

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
12,Sunday,Espresso,45
11,Saturday,Latte,35
2,Tuesday,Espresso,30
10,Saturday,Espresso,45
13,Sunday,Latte,35
8,Friday,Espresso,45
7,Thursday,Latte,30
5,Wednesday,Latte,25
1,Monday,Latte,15


## using loc


### loc select rows

In [176]:
display(coffee.loc[0])          # single label
display(coffee.loc[[0, 1, 2]])  # array of labels
display(coffee.loc[0:3])        # slice syntax
display(coffee.loc[::-1])        # slice syntax

Day              Monday
Coffee Type    Espresso
Units Sold           25
Name: 0, dtype: object

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,30


Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,30
3,Tuesday,Latte,20


Unnamed: 0,Day,Coffee Type,Units Sold
13,Sunday,Latte,35
12,Sunday,Espresso,45
11,Saturday,Latte,35
10,Saturday,Espresso,45
9,Friday,Latte,35
8,Friday,Espresso,45
7,Thursday,Latte,30
6,Thursday,Espresso,40
5,Wednesday,Latte,25
4,Wednesday,Espresso,35


### loc select columns

In [177]:
display(coffee.loc[0:2, 'Day'])                     # specify column
display(coffee.loc[0:2, ['Day', 'Units Sold']])     # multiple columns
display(coffee.loc[0:2, 'Day':'Units Sold'])        # slice of columns
display(coffee.loc[0:2, 'Coffee Type'::-1])         # slice of columns

0     Monday
1     Monday
2    Tuesday
Name: Day, dtype: object

Unnamed: 0,Day,Units Sold
0,Monday,25
1,Monday,15
2,Tuesday,30


Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,30


Unnamed: 0,Coffee Type,Day
0,Espresso,Monday
1,Latte,Monday
2,Espresso,Tuesday


## using iloc

###  iloc select rows

In [178]:
display(coffee.iloc[0])          # single label
display(coffee.iloc[[0, 1, 2]])  # array of labels
display(coffee.iloc[0:3])        # slice syntax
display(coffee.iloc[::-1])        # slice syntax

Day              Monday
Coffee Type    Espresso
Units Sold           25
Name: 0, dtype: object

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,30


Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,30


Unnamed: 0,Day,Coffee Type,Units Sold
13,Sunday,Latte,35
12,Sunday,Espresso,45
11,Saturday,Latte,35
10,Saturday,Espresso,45
9,Friday,Latte,35
8,Friday,Espresso,45
7,Thursday,Latte,30
6,Thursday,Espresso,40
5,Wednesday,Latte,25
4,Wednesday,Espresso,35


### iloc select columns

In [179]:
display(coffee.iloc[0:2, 0])          # specify column
display(coffee.iloc[0:2, [0, 2]])     # multiple columns
display(coffee.iloc[0:2, 0:2])        # slice of columns
display(coffee.iloc[0:2, 2::-1])      # slice of columns

0    Monday
1    Monday
Name: Day, dtype: object

Unnamed: 0,Day,Units Sold
0,Monday,25
1,Monday,15


Unnamed: 0,Day,Coffee Type
0,Monday,Espresso
1,Monday,Latte


Unnamed: 0,Units Sold,Coffee Type,Day
0,25,Espresso,Monday
1,15,Latte,Monday


## set columns values as index

In [180]:
coffee.index = coffee.Day
coffee

Unnamed: 0_level_0,Day,Coffee Type,Units Sold
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Monday,Monday,Espresso,25
Monday,Monday,Latte,15
Tuesday,Tuesday,Espresso,30
Tuesday,Tuesday,Latte,20
Wednesday,Wednesday,Espresso,35
Wednesday,Wednesday,Latte,25
Thursday,Thursday,Espresso,40
Thursday,Thursday,Latte,30
Friday,Friday,Espresso,45
Friday,Friday,Latte,35


In [181]:
display(coffee.loc['Monday':'Tuesday']) # i would not trust this, i don't think the order is reliable


Unnamed: 0_level_0,Day,Coffee Type,Units Sold
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Monday,Monday,Espresso,25
Monday,Monday,Latte,15
Tuesday,Tuesday,Espresso,30
Tuesday,Tuesday,Latte,20


## reset index

In [182]:
coffee = coffee.reset_index(drop=True)
coffee

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,30
3,Tuesday,Latte,20
4,Wednesday,Espresso,35
5,Wednesday,Latte,25
6,Thursday,Espresso,40
7,Thursday,Latte,30
8,Friday,Espresso,45
9,Friday,Latte,35


## set values with loc

### set single value

In [183]:
display(coffee.head(2))
coffee.loc[1, 'Units Sold'] = 10
coffee.head(2)

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15


Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,10


### set multiple values

In [184]:
display(coffee.head())
coffee.loc[0:5, 'Units Sold'] = 0
coffee.head()

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,10
2,Tuesday,Espresso,30
3,Tuesday,Latte,20
4,Wednesday,Espresso,35


Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,0
1,Monday,Latte,0
2,Tuesday,Espresso,0
3,Tuesday,Latte,0
4,Wednesday,Espresso,0


## using at, iat

get single scalar value

In [None]:
coffee = pd.read_csv('./warmup-data/coffee.csv')
display(coffee.loc[0, 'Units Sold'])    # LABEL, column -> scalar
display(coffee.at[0, 'Units Sold'])     # LABEL, column -> scalar
display(coffee.iloc[0, 2])              # row, col      -> scalar
display(coffee.iat[0, 2])               # row, col      -> scalar

np.int64(25)

np.int64(25)

np.int64(25)

np.int64(25)

## get column

In [188]:
display(coffee.Day.head())
display(coffee['Day'].head())

0       Monday
1       Monday
2      Tuesday
3      Tuesday
4    Wednesday
Name: Day, dtype: object

0       Monday
1       Monday
2      Tuesday
3      Tuesday
4    Wednesday
Name: Day, dtype: object

## sort df

In [None]:
coffee.sort_values('Coffee Type', ascending=False).head()   # single column

Unnamed: 0,Day,Coffee Type,Units Sold
1,Monday,Latte,15
3,Tuesday,Latte,20
5,Wednesday,Latte,25
7,Thursday,Latte,30
9,Friday,Latte,35


In [None]:
coffee.sort_values(                 # multiple columns
    ['Coffee Type', 'Units Sold'],
    ascending=[False, True]
).head()

Unnamed: 0,Day,Coffee Type,Units Sold
1,Monday,Latte,15
3,Tuesday,Latte,20
5,Wednesday,Latte,25
7,Thursday,Latte,30
9,Friday,Latte,35


## iterating rows

In [197]:
for index, row in coffee.head(2).iterrows():
    display(index)
    display(row)

0

Day              Monday
Coffee Type    Espresso
Units Sold           25
Name: 0, dtype: object

1

Day            Monday
Coffee Type     Latte
Units Sold         15
Name: 1, dtype: object

In [199]:
for index, row in coffee.head(2).iterrows():
    display(index)
    display(row.Day)
    display(row['Coffee Type'])

0

'Monday'

'Espresso'

1

'Monday'

'Latte'