### Indexing, Selecting, Filtering Data with Pandas ###

In [1]:
import pandas as pd
import numpy as np

In [7]:
time_series = pd.date_range('1/1/2022', periods=20)

df = pd.DataFrame(np.random.randn(20, 5),
                           
                           index=time_series,
                           columns=['Column 1', 'Column 2', 'Column 3', 'Column 4', 'Column 5'])

In [8]:
df

Unnamed: 0,Column 1,Column 2,Column 3,Column 4,Column 5
2022-01-01,0.546506,1.795413,0.087071,0.395324,-0.306444
2022-01-02,-1.115679,-1.450695,-0.096523,-0.153766,0.054588
2022-01-03,1.881724,-1.509677,0.88766,0.334789,0.284747
2022-01-04,0.216806,0.261973,-0.683377,0.070256,0.365888
2022-01-05,-0.404587,-1.082326,-1.19224,1.350658,1.415363
2022-01-06,1.477276,-0.487695,0.663266,0.89965,-0.233616
2022-01-07,1.4378,0.185317,0.749567,-0.48897,-1.259818
2022-01-08,-0.496188,0.756347,-0.468596,-0.150037,-0.705797
2022-01-09,0.723633,-0.009474,-0.700412,-1.160111,-2.800514
2022-01-10,0.661667,-1.600436,-0.821111,0.221858,0.423527


In [9]:
df['Column 1']

2022-01-01    0.546506
2022-01-02   -1.115679
2022-01-03    1.881724
2022-01-04    0.216806
2022-01-05   -0.404587
2022-01-06    1.477276
2022-01-07    1.437800
2022-01-08   -0.496188
2022-01-09    0.723633
2022-01-10    0.661667
2022-01-11   -1.639306
2022-01-12   -0.583358
2022-01-13   -0.833138
2022-01-14    1.143871
2022-01-15   -0.413067
2022-01-16   -1.552272
2022-01-17    1.722033
2022-01-18    0.368298
2022-01-19    1.623658
2022-01-20    0.651723
Freq: D, Name: Column 1, dtype: float64

In [10]:
df[['Column 1', 'Column 2', 'Column 3']]

Unnamed: 0,Column 1,Column 2,Column 3
2022-01-01,0.546506,1.795413,0.087071
2022-01-02,-1.115679,-1.450695,-0.096523
2022-01-03,1.881724,-1.509677,0.88766
2022-01-04,0.216806,0.261973,-0.683377
2022-01-05,-0.404587,-1.082326,-1.19224
2022-01-06,1.477276,-0.487695,0.663266
2022-01-07,1.4378,0.185317,0.749567
2022-01-08,-0.496188,0.756347,-0.468596
2022-01-09,0.723633,-0.009474,-0.700412
2022-01-10,0.661667,-1.600436,-0.821111


In [11]:
#timeseries specific indexing

col1 = df['Column 1']
col1[time_series[3]]

0.21680611504454886

In [12]:
#creating a dataframe using the dictionary structure
x = pd.DataFrame({'x': [1,2,3], 'y':[3,4,5]})

#selection by position
#specifying new values to the row number 1
x.loc[1] = {'x':9, 'y': 99}

print(x)

   x   y
0  1   3
1  9  99
2  3   5


### Slicing

In [13]:
df.iloc[:5, 0:2]

Unnamed: 0,Column 1,Column 2
2022-01-01,0.546506,1.795413
2022-01-02,-1.115679,-1.450695
2022-01-03,1.881724,-1.509677
2022-01-04,0.216806,0.261973
2022-01-05,-0.404587,-1.082326


In [14]:
df[::3]

Unnamed: 0,Column 1,Column 2,Column 3,Column 4,Column 5
2022-01-01,0.546506,1.795413,0.087071,0.395324,-0.306444
2022-01-04,0.216806,0.261973,-0.683377,0.070256,0.365888
2022-01-07,1.4378,0.185317,0.749567,-0.48897,-1.259818
2022-01-10,0.661667,-1.600436,-0.821111,0.221858,0.423527
2022-01-13,-0.833138,-0.46718,-0.159229,0.196322,2.028993
2022-01-16,-1.552272,-0.756116,0.450353,-1.919512,-0.497483
2022-01-19,1.623658,-1.186866,1.226944,1.062542,-0.636761


In [None]:
### Filtering

In [15]:
df[(df['Column 3'] < 0)]

Unnamed: 0,Column 1,Column 2,Column 3,Column 4,Column 5
2022-01-02,-1.115679,-1.450695,-0.096523,-0.153766,0.054588
2022-01-04,0.216806,0.261973,-0.683377,0.070256,0.365888
2022-01-05,-0.404587,-1.082326,-1.19224,1.350658,1.415363
2022-01-08,-0.496188,0.756347,-0.468596,-0.150037,-0.705797
2022-01-09,0.723633,-0.009474,-0.700412,-1.160111,-2.800514
2022-01-10,0.661667,-1.600436,-0.821111,0.221858,0.423527
2022-01-11,-1.639306,2.0682,-2.24772,1.393602,0.703139
2022-01-13,-0.833138,-0.46718,-0.159229,0.196322,2.028993
2022-01-15,-0.413067,0.182331,-0.620634,1.21363,-0.074433
2022-01-18,0.368298,-0.340773,-2.17533,-0.523015,0.205771


In [16]:
#we want values in Column 1 less than 0 and in Column 2 more than 0
#yet we only want to display the values from Column 4 -&- Column 5
df[(df['Column 1'] < 0) & (df['Column 2'] > 0)][['Column 4', 'Column 5']]

Unnamed: 0,Column 4,Column 5
2022-01-08,-0.150037,-0.705797
2022-01-11,1.393602,0.703139
2022-01-12,1.628079,-0.459972
2022-01-15,1.21363,-0.074433
