# Pandas Recap

In [2]:
import numpy as np
import pandas as pd # common way to import pandas

In [3]:
dates = pd.date_range('20200101', periods=6) # index for 6 days starting with 2020-01-01
dates

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06'],
              dtype='datetime64[ns]', freq='D')

In [4]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD')) #row index as dates, and columns as category 
df

Unnamed: 0,A,B,C,D
2020-01-01,1.396578,0.268719,1.563769,-0.27377
2020-01-02,-0.135201,-0.009364,-0.830953,0.082046
2020-01-03,0.567677,1.24315,-0.841439,2.50706
2020-01-04,0.639014,-1.925685,1.018186,-0.208892
2020-01-05,2.245296,-0.905825,-0.017656,0.185255
2020-01-06,-0.538783,-0.461148,-0.113673,-0.285494


## Selecting

When indexing with brackets, there are two ways!

In [5]:
df["A"] # selecting a single column

2020-01-01    1.396578
2020-01-02   -0.135201
2020-01-03    0.567677
2020-01-04    0.639014
2020-01-05    2.245296
2020-01-06   -0.538783
Freq: D, Name: A, dtype: float64

In [6]:
df[["A","B"]] # selecting multiple columns with list of column indexes

Unnamed: 0,A,B
2020-01-01,1.396578,0.268719
2020-01-02,-0.135201,-0.009364
2020-01-03,0.567677,1.24315
2020-01-04,0.639014,-1.925685
2020-01-05,2.245296,-0.905825
2020-01-06,-0.538783,-0.461148


In [7]:
df["2020-01-01":"2020-01-02"] # selecting a range of rows

Unnamed: 0,A,B,C,D
2020-01-01,1.396578,0.268719,1.563769,-0.27377
2020-01-02,-0.135201,-0.009364,-0.830953,0.082046


With `.loc()`, we can select values by "custom" index, in a numpy way

In [12]:
df.loc["2020-01-01":"2020-01-03", "A":"B"] #include the end index!

Unnamed: 0,A,B
2020-01-01,1.396578,0.268719
2020-01-02,-0.135201,-0.009364
2020-01-03,0.567677,1.24315


`iloc()` does the same, but by range index

In [13]:
df.iloc[0:3,2:4] # doesnt include end index!

Unnamed: 0,C,D
2020-01-01,1.563769,-0.27377
2020-01-02,-0.830953,0.082046
2020-01-03,-0.841439,2.50706


In [30]:
df.at[dates[0],"B"]

0.26871884107957483

In [26]:
dates[0]

Timestamp('2020-01-01 00:00:00', freq='D')

In [33]:
df.at[df.index[0], "A"] #can only select single values with this

1.3965784315081455

With .at() we can select only 1 value

In [35]:
df.iat[0, "A"] # only works for integer indexers!

ValueError: iAt based indexing can only have integer indexers

In [36]:
df_excel = pd.read_excel('excel_example.xlsx', 'Sheet1', index_col=None, na_values=['NA'])

In [40]:
df_excel.iat[2,2] # works here again

'Gent'

In [None]:
df_excel.ix