In [3]:
import pandas as pd
import numpy as np 
from numpy.random import randn

In [4]:
np.random.seed(101)

In [10]:
df = pd.DataFrame(randn(5,4),  index=['A', 'B', 'C', 'D', 'E'],columns=['W', 'X', 'Y', 'Z'])

In [11]:
df

Unnamed: 0,W,X,Y,Z
A,0.38603,2.084019,-0.376519,0.230336
B,0.681209,1.035125,-0.03116,1.939932
C,-1.005187,-0.74179,0.187125,-0.732845
D,-1.38292,1.482495,0.961458,-2.141212
E,0.992573,1.192241,-1.04678,1.292765


## Selecting Series from DF 

In [15]:
df['W']  # each column is a series 

A    0.386030
B    0.681209
C   -1.005187
D   -1.382920
E    0.992573
Name: W, dtype: float64

In [14]:
type(df['W'])

pandas.core.series.Series

In [17]:
df['W']['B']

0.6812092925867574

In [21]:
df[['W','X']]

Unnamed: 0,W,X
A,0.38603,2.084019
B,0.681209,1.035125
C,-1.005187,-0.74179
D,-1.38292,1.482495
E,0.992573,1.192241


# Part 1 

### Creating new Column 

In [31]:
df['NEW'] = df['W'] + df['X']

In [32]:
df

Unnamed: 0,W,X,Y,Z,NEW
A,0.38603,2.084019,-0.376519,0.230336,2.470049
B,0.681209,1.035125,-0.03116,1.939932,1.716334
C,-1.005187,-0.74179,0.187125,-0.732845,-1.746977
D,-1.38292,1.482495,0.961458,-2.141212,0.099575
E,0.992573,1.192241,-1.04678,1.292765,2.184814


### Dropping the columns and Rows (use drop())

In [33]:
df.drop('NEW', axis=1)  # axis=0 means that we are looking at index. 

Unnamed: 0,W,X,Y,Z
A,0.38603,2.084019,-0.376519,0.230336
B,0.681209,1.035125,-0.03116,1.939932
C,-1.005187,-0.74179,0.187125,-0.732845
D,-1.38292,1.482495,0.961458,-2.141212
E,0.992573,1.192241,-1.04678,1.292765


In [34]:
df # as we did not drop in place so the column was not deleted

Unnamed: 0,W,X,Y,Z,NEW
A,0.38603,2.084019,-0.376519,0.230336,2.470049
B,0.681209,1.035125,-0.03116,1.939932,1.716334
C,-1.005187,-0.74179,0.187125,-0.732845,-1.746977
D,-1.38292,1.482495,0.961458,-2.141212,0.099575
E,0.992573,1.192241,-1.04678,1.292765,2.184814


In [35]:
# inplace deletion 
df.drop('NEW', axis=1, inplace=True)
df # this deletes the data from df

Unnamed: 0,W,X,Y,Z
A,0.38603,2.084019,-0.376519,0.230336
B,0.681209,1.035125,-0.03116,1.939932
C,-1.005187,-0.74179,0.187125,-0.732845
D,-1.38292,1.482495,0.961458,-2.141212
E,0.992573,1.192241,-1.04678,1.292765


In [36]:
# dropping rows 
df.drop('E', axis=0)

Unnamed: 0,W,X,Y,Z
A,0.38603,2.084019,-0.376519,0.230336
B,0.681209,1.035125,-0.03116,1.939932
C,-1.005187,-0.74179,0.187125,-0.732845
D,-1.38292,1.482495,0.961458,-2.141212


In [37]:
df

Unnamed: 0,W,X,Y,Z
A,0.38603,2.084019,-0.376519,0.230336
B,0.681209,1.035125,-0.03116,1.939932
C,-1.005187,-0.74179,0.187125,-0.732845
D,-1.38292,1.482495,0.961458,-2.141212
E,0.992573,1.192241,-1.04678,1.292765


In [38]:
df.drop('E', inplace=True)

In [39]:
df

Unnamed: 0,W,X,Y,Z
A,0.38603,2.084019,-0.376519,0.230336
B,0.681209,1.035125,-0.03116,1.939932
C,-1.005187,-0.74179,0.187125,-0.732845
D,-1.38292,1.482495,0.961458,-2.141212


In [40]:
df.shape 

(4, 4)

### Accessing column, rows and cells  (loc, iloc) 

In [41]:
# Row (index) is reffered as axis=0 and columns are reffered axis=1 

In [42]:
# selecting multiple rows 
df.loc['A']

W    0.386030
X    2.084019
Y   -0.376519
Z    0.230336
Name: A, dtype: float64

In [43]:
df.loc[['A', 'C']]

Unnamed: 0,W,X,Y,Z
A,0.38603,2.084019,-0.376519,0.230336
C,-1.005187,-0.74179,0.187125,-0.732845


In [47]:
# look up the row by index position 
df.iloc[[2,3]]

Unnamed: 0,W,X,Y,Z
C,-1.005187,-0.74179,0.187125,-0.732845
D,-1.38292,1.482495,0.961458,-2.141212


In [50]:
# you can get a cell by loc [x,y] notation - loc[row,column]
df.loc['B','Y']

-0.031160481493099617

In [51]:
df.loc[['A','B'], ['W', 'Y']]

Unnamed: 0,W,Y
A,0.38603,-0.376519
B,0.681209,-0.03116


# Part 2 