# Pandas - DataFrames

In [26]:
import numpy as np
import pandas as pd
from numpy.random import randn

In [27]:
np.random.seed(10)

In [28]:
df = pd.DataFrame(data=randn(5, 4), index=['A', 'B', 'C', 'D', 'E'], columns=['W', 'X', 'Y', 'Z'])
df

Unnamed: 0,W,X,Y,Z
A,1.331587,0.715279,-1.5454,-0.008384
B,0.621336,-0.720086,0.265512,0.108549
C,0.004291,-0.1746,0.433026,1.203037
D,-0.965066,1.028274,0.22863,0.445138
E,-1.136602,0.135137,1.484537,-1.079805


## Indexing & Selection

In [29]:
df['W']

A    1.331587
B    0.621336
C    0.004291
D   -0.965066
E   -1.136602
Name: W, dtype: float64

In [30]:
df['X']

A    0.715279
B   -0.720086
C   -0.174600
D    1.028274
E    0.135137
Name: X, dtype: float64

In [31]:
type(df)

pandas.core.frame.DataFrame

In [32]:
type(df['X'])

pandas.core.series.Series

In [33]:
# Alternate Syntax To Grab Column - Not Recommended
df.W

A    1.331587
B    0.621336
C    0.004291
D   -0.965066
E   -1.136602
Name: W, dtype: float64

In [34]:
# Selecting Multiple Columns
df[['W', 'X']]

Unnamed: 0,W,X
A,1.331587,0.715279
B,0.621336,-0.720086
C,0.004291,-0.1746
D,-0.965066,1.028274
E,-1.136602,0.135137


### Creating New Column

In [35]:
df['new'] = df['W'] + df['Y']
df

Unnamed: 0,W,X,Y,Z,new
A,1.331587,0.715279,-1.5454,-0.008384,-0.213814
B,0.621336,-0.720086,0.265512,0.108549,0.886848
C,0.004291,-0.1746,0.433026,1.203037,0.437318
D,-0.965066,1.028274,0.22863,0.445138,-0.736436
E,-1.136602,0.135137,1.484537,-1.079805,0.347935


### Removing a Column

In [36]:
# Axis(0) -> Row & Axis(1) -> Column
# It doesn't Happen Inplace (Permanently), To Make it Inplace Use inplace parameter or use reassign technique.
df.drop('new', axis=1)

Unnamed: 0,W,X,Y,Z
A,1.331587,0.715279,-1.5454,-0.008384
B,0.621336,-0.720086,0.265512,0.108549
C,0.004291,-0.1746,0.433026,1.203037
D,-0.965066,1.028274,0.22863,0.445138
E,-1.136602,0.135137,1.484537,-1.079805


In [37]:
df

Unnamed: 0,W,X,Y,Z,new
A,1.331587,0.715279,-1.5454,-0.008384,-0.213814
B,0.621336,-0.720086,0.265512,0.108549,0.886848
C,0.004291,-0.1746,0.433026,1.203037,0.437318
D,-0.965066,1.028274,0.22863,0.445138,-0.736436
E,-1.136602,0.135137,1.484537,-1.079805,0.347935


In [38]:
df = df.drop('new', axis=1)

In [39]:
df

Unnamed: 0,W,X,Y,Z
A,1.331587,0.715279,-1.5454,-0.008384
B,0.621336,-0.720086,0.265512,0.108549
C,0.004291,-0.1746,0.433026,1.203037
D,-0.965066,1.028274,0.22863,0.445138
E,-1.136602,0.135137,1.484537,-1.079805


In [40]:
df.drop('Z', axis=1, inplace=True)

In [41]:
df

Unnamed: 0,W,X,Y
A,1.331587,0.715279,-1.5454
B,0.621336,-0.720086,0.265512
C,0.004291,-0.1746,0.433026
D,-0.965066,1.028274,0.22863
E,-1.136602,0.135137,1.484537


In [42]:
df = pd.DataFrame(data=randn(5, 4), index=['A', 'B', 'C', 'D', 'E'], columns=['W', 'X', 'Y', 'Z'])
df

Unnamed: 0,W,X,Y,Z
A,-1.977728,-1.743372,0.26607,2.384967
B,1.123691,1.672622,0.099149,1.397996
C,-0.271248,0.613204,-0.267317,-0.549309
D,0.132708,-0.476142,1.308473,0.195013
E,0.40021,-0.337632,1.256472,-0.73197


### Removing a Row

In [43]:
# No Need To Specify Axis=0 coz, It is Selected By Default.
df.drop('E', axis=0)

Unnamed: 0,W,X,Y,Z
A,-1.977728,-1.743372,0.26607,2.384967
B,1.123691,1.672622,0.099149,1.397996
C,-0.271248,0.613204,-0.267317,-0.549309
D,0.132708,-0.476142,1.308473,0.195013


In [44]:
df

Unnamed: 0,W,X,Y,Z
A,-1.977728,-1.743372,0.26607,2.384967
B,1.123691,1.672622,0.099149,1.397996
C,-0.271248,0.613204,-0.267317,-0.549309
D,0.132708,-0.476142,1.308473,0.195013
E,0.40021,-0.337632,1.256472,-0.73197


In [45]:
df.shape

(5, 4)

### Selecting Rows

In [46]:
# loc[] Uses Provided / Custom Index as Input.
df.loc['D']

W    0.132708
X   -0.476142
Y    1.308473
Z    0.195013
Name: D, dtype: float64

In [48]:
# iloc[] Uses Zero Based Index (Position) as Input.
df.iloc[3]

W    0.132708
X   -0.476142
Y    1.308473
Z    0.195013
Name: D, dtype: float64

### Selecting Subset of DataFrame

In [54]:
df['W'].loc['D']

0.1327082957599512

In [55]:
df.loc['D', 'W']

0.1327082957599512

In [56]:
df.loc[['A', 'E'] , ['W', 'Y']]

Unnamed: 0,W,Y
A,-1.977728,0.26607
E,0.40021,1.256472
