# Importing necessary libraries

In [1]:
import numpy as np
import pandas as pd

In [2]:
from numpy.random import randn
np.random.seed(10)  # To see same results

# Creating a dataframe using Pandas

In [3]:
df = pd.DataFrame(randn(5,4),index=['A','B','C','D','E'],columns='W X Y Z'.split())
df

Unnamed: 0,W,X,Y,Z
A,1.331587,0.715279,-1.5454,-0.008384
B,0.621336,-0.720086,0.265512,0.108549
C,0.004291,-0.1746,0.433026,1.203037
D,-0.965066,1.028274,0.22863,0.445138
E,-1.136602,0.135137,1.484537,-1.079805


# Selection and Indexing 

In [4]:
df['W']

A    1.331587
B    0.621336
C    0.004291
D   -0.965066
E   -1.136602
Name: W, dtype: float64

##### if you need to acess two columns together then 

In [5]:
df[['W','Z']] # pass a list of columns

Unnamed: 0,W,Z
A,1.331587,-0.008384
B,0.621336,0.108549
C,0.004291,1.203037
D,-0.965066,0.445138
E,-1.136602,-1.079805


#### Another way is using .(dot) operator which not recommend by me to not make any confusion in future purpose

In [6]:
df.W

A    1.331587
B    0.621336
C    0.004291
D   -0.965066
E   -1.136602
Name: W, dtype: float64

In [7]:
type(df['W'])

pandas.core.series.Series

In [8]:
type(df.W)

pandas.core.series.Series

### Creating a new column 

In [9]:
df['New Column'] = df['X'] + df['W']

In [10]:
df

Unnamed: 0,W,X,Y,Z,New Column
A,1.331587,0.715279,-1.5454,-0.008384,2.046865
B,0.621336,-0.720086,0.265512,0.108549,-0.09875
C,0.004291,-0.1746,0.433026,1.203037,-0.170309
D,-0.965066,1.028274,0.22863,0.445138,0.063208
E,-1.136602,0.135137,1.484537,-1.079805,-1.001465


In [11]:
df['XW'] = df['New Column']

In [12]:
df

Unnamed: 0,W,X,Y,Z,New Column,XW
A,1.331587,0.715279,-1.5454,-0.008384,2.046865,2.046865
B,0.621336,-0.720086,0.265512,0.108549,-0.09875,-0.09875
C,0.004291,-0.1746,0.433026,1.203037,-0.170309,-0.170309
D,-0.965066,1.028274,0.22863,0.445138,0.063208,0.063208
E,-1.136602,0.135137,1.484537,-1.079805,-1.001465,-1.001465


In [13]:
df.drop(labels='New Column',axis=1) # axis =1 means we are droping columns wise. Axis =0 for row wise.

Unnamed: 0,W,X,Y,Z,XW
A,1.331587,0.715279,-1.5454,-0.008384,2.046865
B,0.621336,-0.720086,0.265512,0.108549,-0.09875
C,0.004291,-0.1746,0.433026,1.203037,-0.170309
D,-0.965066,1.028274,0.22863,0.445138,0.063208
E,-1.136602,0.135137,1.484537,-1.079805,-1.001465


In [14]:
df  # because we didn't put inplace= True in our parameters

Unnamed: 0,W,X,Y,Z,New Column,XW
A,1.331587,0.715279,-1.5454,-0.008384,2.046865,2.046865
B,0.621336,-0.720086,0.265512,0.108549,-0.09875,-0.09875
C,0.004291,-0.1746,0.433026,1.203037,-0.170309,-0.170309
D,-0.965066,1.028274,0.22863,0.445138,0.063208,0.063208
E,-1.136602,0.135137,1.484537,-1.079805,-1.001465,-1.001465


In [15]:
df.drop(labels='New Column', axis =1, inplace=True) #permanent delete  

In [16]:
df

Unnamed: 0,W,X,Y,Z,XW
A,1.331587,0.715279,-1.5454,-0.008384,2.046865
B,0.621336,-0.720086,0.265512,0.108549,-0.09875
C,0.004291,-0.1746,0.433026,1.203037,-0.170309
D,-0.965066,1.028274,0.22863,0.445138,0.063208
E,-1.136602,0.135137,1.484537,-1.079805,-1.001465


### Selecting Row

In [17]:
df.loc['A']

W     1.331587
X     0.715279
Y    -1.545400
Z    -0.008384
XW    2.046865
Name: A, dtype: float64

#### Selection based on position instead of labels

In [18]:
df.iloc[0]

W     1.331587
X     0.715279
Y    -1.545400
Z    -0.008384
XW    2.046865
Name: A, dtype: float64

In [19]:
# df.iloc['A'] # try and see the error

### Selecting subset of rows and columns

In [20]:
df

Unnamed: 0,W,X,Y,Z,XW
A,1.331587,0.715279,-1.5454,-0.008384,2.046865
B,0.621336,-0.720086,0.265512,0.108549,-0.09875
C,0.004291,-0.1746,0.433026,1.203037,-0.170309
D,-0.965066,1.028274,0.22863,0.445138,0.063208
E,-1.136602,0.135137,1.484537,-1.079805,-1.001465


In [21]:
df.loc['B','Y']

0.2655115856921195

In [22]:
df.iloc[1,2]

0.2655115856921195

In [23]:
df.loc[['A','B'],['Z','XW']]

Unnamed: 0,Z,XW
A,-0.008384,2.046865
B,0.108549,-0.09875


In [24]:
# What will going to happen if you will going to change the rows with columns
# df.loc[['X','Y'],['A','D']]                  Try Yourself

## Conditional Selection
#### An important feature of pandas is conditional selection using bracket notation, very similar to numpy:

In [25]:
df

Unnamed: 0,W,X,Y,Z,XW
A,1.331587,0.715279,-1.5454,-0.008384,2.046865
B,0.621336,-0.720086,0.265512,0.108549,-0.09875
C,0.004291,-0.1746,0.433026,1.203037,-0.170309
D,-0.965066,1.028274,0.22863,0.445138,0.063208
E,-1.136602,0.135137,1.484537,-1.079805,-1.001465


In [26]:
df>0

Unnamed: 0,W,X,Y,Z,XW
A,True,True,False,False,True
B,True,False,True,True,False
C,True,False,True,True,False
D,False,True,True,True,True
E,False,True,True,False,False


In [27]:
df[df>0]

Unnamed: 0,W,X,Y,Z,XW
A,1.331587,0.715279,,,2.046865
B,0.621336,,0.265512,0.108549,
C,0.004291,,0.433026,1.203037,
D,,1.028274,0.22863,0.445138,0.063208
E,,0.135137,1.484537,,


In [28]:
df[df['W']>0]

Unnamed: 0,W,X,Y,Z,XW
A,1.331587,0.715279,-1.5454,-0.008384,2.046865
B,0.621336,-0.720086,0.265512,0.108549,-0.09875
C,0.004291,-0.1746,0.433026,1.203037,-0.170309


In [29]:
df[df['W']>0]['Y']

A   -1.545400
B    0.265512
C    0.433026
Name: Y, dtype: float64

In [30]:
df[df['W']>0][['Y','Z']]   # Note for multiple columns use double square brackets

Unnamed: 0,Y,Z
A,-1.5454,-0.008384
B,0.265512,0.108549
C,0.433026,1.203037


For two conditions you can use | and & with parenthesis:

In [31]:
df['XW'][(df['W']>0)| (df['Y']<0)]

A    2.046865
B   -0.098750
C   -0.170309
Name: XW, dtype: float64

## More Index Details
Let's discuss some more features of indexing, including resetting the index or setting it something else. We'll also talk about index hierarchy!

In [32]:
df

Unnamed: 0,W,X,Y,Z,XW
A,1.331587,0.715279,-1.5454,-0.008384,2.046865
B,0.621336,-0.720086,0.265512,0.108549,-0.09875
C,0.004291,-0.1746,0.433026,1.203037,-0.170309
D,-0.965066,1.028274,0.22863,0.445138,0.063208
E,-1.136602,0.135137,1.484537,-1.079805,-1.001465


In [33]:
# Reset the index to 0,1,2...n

df.reset_index()

Unnamed: 0,index,W,X,Y,Z,XW
0,A,1.331587,0.715279,-1.5454,-0.008384,2.046865
1,B,0.621336,-0.720086,0.265512,0.108549,-0.09875
2,C,0.004291,-0.1746,0.433026,1.203037,-0.170309
3,D,-0.965066,1.028274,0.22863,0.445138,0.063208
4,E,-1.136602,0.135137,1.484537,-1.079805,-1.001465


In [34]:
states = 'MEL QLD NSW WA NT'.split()
states

['MEL', 'QLD', 'NSW', 'WA', 'NT']

In [35]:
df['States'] = states

In [36]:
df

Unnamed: 0,W,X,Y,Z,XW,States
A,1.331587,0.715279,-1.5454,-0.008384,2.046865,MEL
B,0.621336,-0.720086,0.265512,0.108549,-0.09875,QLD
C,0.004291,-0.1746,0.433026,1.203037,-0.170309,NSW
D,-0.965066,1.028274,0.22863,0.445138,0.063208,WA
E,-1.136602,0.135137,1.484537,-1.079805,-1.001465,NT


In [37]:
# Let's change the indesx to States 
df.set_index('States')             # temporary because we didn't pass inplace = True

Unnamed: 0_level_0,W,X,Y,Z,XW
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MEL,1.331587,0.715279,-1.5454,-0.008384,2.046865
QLD,0.621336,-0.720086,0.265512,0.108549,-0.09875
NSW,0.004291,-0.1746,0.433026,1.203037,-0.170309
WA,-0.965066,1.028274,0.22863,0.445138,0.063208
NT,-1.136602,0.135137,1.484537,-1.079805,-1.001465


In [38]:
df

Unnamed: 0,W,X,Y,Z,XW,States
A,1.331587,0.715279,-1.5454,-0.008384,2.046865,MEL
B,0.621336,-0.720086,0.265512,0.108549,-0.09875,QLD
C,0.004291,-0.1746,0.433026,1.203037,-0.170309,NSW
D,-0.965066,1.028274,0.22863,0.445138,0.063208,WA
E,-1.136602,0.135137,1.484537,-1.079805,-1.001465,NT


In [39]:
df.set_index(keys='States', inplace=True)

In [40]:
df

Unnamed: 0_level_0,W,X,Y,Z,XW
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MEL,1.331587,0.715279,-1.5454,-0.008384,2.046865
QLD,0.621336,-0.720086,0.265512,0.108549,-0.09875
NSW,0.004291,-0.1746,0.433026,1.203037,-0.170309
WA,-0.965066,1.028274,0.22863,0.445138,0.063208
NT,-1.136602,0.135137,1.484537,-1.079805,-1.001465


# End of Selection and Indexing. 
* For more follow me on Linkedin | Kaggle | GitHub | Medium | Facebook | Instagram - @imoisharma