## Indexing

In [13]:
# importing the necessary libraries

import pandas as pd
import numpy as np

In [14]:
# creating a time series using the date_range method from pandas
time_series = pd.date_range('1/1/2020', periods=20)

df = pd.DataFrame(np.random.randn(20, 5),   # 20 refers to the number of rows
                                            # 5 refers to the number of columns
                  index=time_series,
                  columns=['Column 1', 'Column 2', 'Column 3', 'Column 4', 'Column 5'])

In [15]:
# lets check the dataframe that we have just created 

df

Unnamed: 0,Column 1,Column 2,Column 3,Column 4,Column 5
2020-01-01,-0.506672,-0.223504,1.374367,-0.673617,-1.594992
2020-01-02,-0.288263,-0.308458,0.118085,0.5526,0.057227
2020-01-03,0.208609,0.340054,1.385632,-1.391342,1.447169
2020-01-04,0.755634,-0.584098,1.462399,0.169597,0.646324
2020-01-05,-2.071256,-0.095079,0.967895,-0.553546,-1.772507
2020-01-06,-0.553698,1.113536,-0.208415,-0.971505,0.448935
2020-01-07,2.179848,1.11586,-0.742047,0.039219,1.084622
2020-01-08,1.77319,-0.763939,0.731432,0.760961,-1.826588
2020-01-09,-1.410691,-0.435762,-0.31273,-0.420567,1.313196
2020-01-10,-2.198756,-0.140123,-0.278568,1.416964,-2.059844


In [16]:
df['Column 1']

2020-01-01   -0.506672
2020-01-02   -0.288263
2020-01-03    0.208609
2020-01-04    0.755634
2020-01-05   -2.071256
2020-01-06   -0.553698
2020-01-07    2.179848
2020-01-08    1.773190
2020-01-09   -1.410691
2020-01-10   -2.198756
2020-01-11   -0.367993
2020-01-12    1.143618
2020-01-13    0.316275
2020-01-14   -1.233144
2020-01-15    1.369500
2020-01-16   -0.822112
2020-01-17   -0.101630
2020-01-18   -1.573445
2020-01-19   -0.658735
2020-01-20    0.400129
Freq: D, Name: Column 1, dtype: float64

In [17]:
df[['Column 1', 'Column 2', 'Column 3']]

Unnamed: 0,Column 1,Column 2,Column 3
2020-01-01,-0.506672,-0.223504,1.374367
2020-01-02,-0.288263,-0.308458,0.118085
2020-01-03,0.208609,0.340054,1.385632
2020-01-04,0.755634,-0.584098,1.462399
2020-01-05,-2.071256,-0.095079,0.967895
2020-01-06,-0.553698,1.113536,-0.208415
2020-01-07,2.179848,1.11586,-0.742047
2020-01-08,1.77319,-0.763939,0.731432
2020-01-09,-1.410691,-0.435762,-0.31273
2020-01-10,-2.198756,-0.140123,-0.278568


In [18]:
# timeseries specific indexing

col1 = df['Column 1']
col1[time_series[3]]

0.755634442895147

In [19]:
# creating a dataframe using the dictionary structure
x = pd.DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]})

# selection by position
# specifying new values to the row number 1
x.loc[1] = {'x': 9, 'y': 99}

# printing the resultant dataframe
print(x)

   x   y
0  1   3
1  9  99
2  3   5


## Slicing

In [20]:
df.iloc[:5, 0:2]

Unnamed: 0,Column 1,Column 2
2020-01-01,-0.506672,-0.223504
2020-01-02,-0.288263,-0.308458
2020-01-03,0.208609,0.340054
2020-01-04,0.755634,-0.584098
2020-01-05,-2.071256,-0.095079


In [21]:
#Striding
df[::3]

Unnamed: 0,Column 1,Column 2,Column 3,Column 4,Column 5
2020-01-01,-0.506672,-0.223504,1.374367,-0.673617,-1.594992
2020-01-04,0.755634,-0.584098,1.462399,0.169597,0.646324
2020-01-07,2.179848,1.11586,-0.742047,0.039219,1.084622
2020-01-10,-2.198756,-0.140123,-0.278568,1.416964,-2.059844
2020-01-13,0.316275,0.946194,1.169399,-0.609133,0.136591
2020-01-16,-0.822112,-0.20922,-1.553531,1.470538,-0.962117
2020-01-19,-0.658735,0.197481,-0.258119,-1.348261,-0.546297


## Filtering

In [22]:
df[(df['Column 3'] < 0)]

Unnamed: 0,Column 1,Column 2,Column 3,Column 4,Column 5
2020-01-06,-0.553698,1.113536,-0.208415,-0.971505,0.448935
2020-01-07,2.179848,1.11586,-0.742047,0.039219,1.084622
2020-01-09,-1.410691,-0.435762,-0.31273,-0.420567,1.313196
2020-01-10,-2.198756,-0.140123,-0.278568,1.416964,-2.059844
2020-01-11,-0.367993,1.378564,-1.204453,-0.473113,-0.794758
2020-01-14,-1.233144,0.239735,-1.435708,0.684846,1.07583
2020-01-16,-0.822112,-0.20922,-1.553531,1.470538,-0.962117
2020-01-18,-1.573445,1.087323,-1.315079,0.293028,-0.680757
2020-01-19,-0.658735,0.197481,-0.258119,-1.348261,-0.546297
2020-01-20,0.400129,0.635901,-0.002345,-1.364261,-0.30502


In [23]:
df[(df['Column 1'] < 0) & (df['Column 2'] > 0)][['Column 4', 'Column 5']]

Unnamed: 0,Column 4,Column 5
2020-01-06,-0.971505,0.448935
2020-01-11,-0.473113,-0.794758
2020-01-14,0.684846,1.07583
2020-01-18,0.293028,-0.680757
2020-01-19,-1.348261,-0.546297
