#  **DataFrames**

In [1]:
import numpy as np
import pandas as pd

In [2]:
from numpy.random import randn

In [3]:
np.random.seed(42)

In [8]:
df = pd.DataFrame(randn(5, 4), index = ["A", "B", "C", "D", "E"], columns = ["W", "X", "Y", "Z"])

In [9]:
df

Unnamed: 0,W,X,Y,Z
A,0.738467,0.171368,-0.115648,-0.301104
B,-1.478522,-0.719844,-0.460639,1.057122
C,0.343618,-1.76304,0.324084,-0.385082
D,-0.676922,0.611676,1.031,0.93128
E,-0.839218,-0.309212,0.331263,0.975545


In [10]:
df[["W", "Y"]]

Unnamed: 0,W,Y
A,0.738467,-0.115648
B,-1.478522,-0.460639
C,0.343618,0.324084
D,-0.676922,1.031
E,-0.839218,0.331263


In [11]:
df["W+Y"] = df["W"] + df["Y"]

In [12]:
df

Unnamed: 0,W,X,Y,Z,W+Y
A,0.738467,0.171368,-0.115648,-0.301104,0.622818
B,-1.478522,-0.719844,-0.460639,1.057122,-1.939161
C,0.343618,-1.76304,0.324084,-0.385082,0.667702
D,-0.676922,0.611676,1.031,0.93128,0.354078
E,-0.839218,-0.309212,0.331263,0.975545,-0.507954


In [13]:
df.drop("W+Y", axis = 1)

Unnamed: 0,W,X,Y,Z
A,0.738467,0.171368,-0.115648,-0.301104
B,-1.478522,-0.719844,-0.460639,1.057122
C,0.343618,-1.76304,0.324084,-0.385082
D,-0.676922,0.611676,1.031,0.93128
E,-0.839218,-0.309212,0.331263,0.975545


In [14]:
df

Unnamed: 0,W,X,Y,Z,W+Y
A,0.738467,0.171368,-0.115648,-0.301104,0.622818
B,-1.478522,-0.719844,-0.460639,1.057122,-1.939161
C,0.343618,-1.76304,0.324084,-0.385082,0.667702
D,-0.676922,0.611676,1.031,0.93128,0.354078
E,-0.839218,-0.309212,0.331263,0.975545,-0.507954


In [15]:
df.drop("W+Y", axis = 1, inplace = True)

In [16]:
df

Unnamed: 0,W,X,Y,Z
A,0.738467,0.171368,-0.115648,-0.301104
B,-1.478522,-0.719844,-0.460639,1.057122
C,0.343618,-1.76304,0.324084,-0.385082
D,-0.676922,0.611676,1.031,0.93128
E,-0.839218,-0.309212,0.331263,0.975545


In [17]:
df.drop("E", axis = 0)

Unnamed: 0,W,X,Y,Z
A,0.738467,0.171368,-0.115648,-0.301104
B,-1.478522,-0.719844,-0.460639,1.057122
C,0.343618,-1.76304,0.324084,-0.385082
D,-0.676922,0.611676,1.031,0.93128


In [18]:
df

Unnamed: 0,W,X,Y,Z
A,0.738467,0.171368,-0.115648,-0.301104
B,-1.478522,-0.719844,-0.460639,1.057122
C,0.343618,-1.76304,0.324084,-0.385082
D,-0.676922,0.611676,1.031,0.93128
E,-0.839218,-0.309212,0.331263,0.975545


In [19]:
df.loc["A"]

W    0.738467
X    0.171368
Y   -0.115648
Z   -0.301104
Name: A, dtype: float64

In [20]:
df.iloc[3]

W   -0.676922
X    0.611676
Y    1.031000
Z    0.931280
Name: D, dtype: float64

In [21]:
df

Unnamed: 0,W,X,Y,Z
A,0.738467,0.171368,-0.115648,-0.301104
B,-1.478522,-0.719844,-0.460639,1.057122
C,0.343618,-1.76304,0.324084,-0.385082
D,-0.676922,0.611676,1.031,0.93128
E,-0.839218,-0.309212,0.331263,0.975545


In [23]:
df.loc[["D","E"], ["X", "Y", "Z"]]

Unnamed: 0,X,Y,Z
D,0.611676,1.031,0.93128
E,-0.309212,0.331263,0.975545


In [37]:
dataf = pd.DataFrame(randn(4, 4), index = [1, 2, 3, 4], columns = ["A", "B", "C", "D"])

In [38]:
dataf

Unnamed: 0,A,B,C,D
1,0.328751,-0.52976,0.513267,0.097078
2,0.968645,-0.702053,-0.327662,-0.392108
3,-1.463515,0.29612,0.261055,0.005113
4,-0.234587,-1.415371,-0.420645,-0.342715


In [39]:
dataf > 0

Unnamed: 0,A,B,C,D
1,True,False,True,True
2,True,False,False,False
3,False,True,True,True
4,False,False,False,False


In [40]:
dataf[dataf > 0]
#NaN for false

Unnamed: 0,A,B,C,D
1,0.328751,,0.513267,0.097078
2,0.968645,,,
3,,0.29612,0.261055,0.005113
4,,,,


In [41]:
dataf[dataf["A"] > 0]
#drops the rows that don't satisfy the condition provided 

Unnamed: 0,A,B,C,D
1,0.328751,-0.52976,0.513267,0.097078
2,0.968645,-0.702053,-0.327662,-0.392108


In [42]:
dataf[dataf["A"] > 0][["B", "C"]]

Unnamed: 0,B,C
1,-0.52976,0.513267
2,-0.702053,-0.327662


In [44]:
dataf[dataf["B"] > 0][["A", "D"]]

Unnamed: 0,A,D
3,-1.463515,0.005113


In [45]:
dataf[dataf["B"] < 0][["A", "D"]]

Unnamed: 0,A,D
1,0.328751,0.097078
2,0.968645,-0.392108
4,-0.234587,-0.342715
