# Pandas Dataframes

In [1]:
import numpy as np
import pandas as pd

In [2]:
from numpy.random import randn

In [3]:
np.random.seed(101)

In [4]:
df = pd.DataFrame(randn(5,4),['A','B','C','D','E'],['W','X','Y','Z'])
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [5]:
print(df['X'])
print(type(df['X']))

A    0.628133
B   -0.319318
C    0.740122
D   -0.758872
E    1.978757
Name: X, dtype: float64
<class 'pandas.core.series.Series'>


In [6]:
df.Y

A    0.907969
B   -0.848077
C    0.528813
D   -0.933237
E    2.605967
Name: Y, dtype: float64

In [7]:
df[['W','Z']]

Unnamed: 0,W,Z
A,2.70685,0.503826
B,0.651118,0.605965
C,-2.018168,-0.589001
D,0.188695,0.955057
E,0.190794,0.683509


In [8]:
df['new'] = df['Y'] + df['W']
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [9]:
df.drop('X',axis=1)

Unnamed: 0,W,Y,Z,new
A,2.70685,0.907969,0.503826,3.614819
B,0.651118,-0.848077,0.605965,-0.196959
C,-2.018168,0.528813,-0.589001,-1.489355
D,0.188695,-0.933237,0.955057,-0.744542
E,0.190794,2.605967,0.683509,2.796762


In [10]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [11]:
df.drop('X',axis=1,inplace=True)

In [12]:
df

Unnamed: 0,W,Y,Z,new
A,2.70685,0.907969,0.503826,3.614819
B,0.651118,-0.848077,0.605965,-0.196959
C,-2.018168,0.528813,-0.589001,-1.489355
D,0.188695,-0.933237,0.955057,-0.744542
E,0.190794,2.605967,0.683509,2.796762


In [13]:
df.loc[['A','B']]

Unnamed: 0,W,Y,Z,new
A,2.70685,0.907969,0.503826,3.614819
B,0.651118,-0.848077,0.605965,-0.196959


In [14]:
df.iloc[[0,2]]

Unnamed: 0,W,Y,Z,new
A,2.70685,0.907969,0.503826,3.614819
C,-2.018168,0.528813,-0.589001,-1.489355


In [15]:
df.loc['A','W']

2.706849839399938

In [16]:
df > 0

Unnamed: 0,W,Y,Z,new
A,True,True,True,True
B,True,False,True,False
C,False,True,False,False
D,True,False,True,False
E,True,True,True,True


In [19]:
df[df > 0].fillna(0)

Unnamed: 0,W,Y,Z,new
A,2.70685,0.907969,0.503826,3.614819
B,0.651118,0.0,0.605965,0.0
C,0.0,0.528813,0.0,0.0
D,0.188695,0.0,0.955057,0.0
E,0.190794,2.605967,0.683509,2.796762


In [30]:
df > 0

Unnamed: 0,W,Y,Z,new
A,True,True,True,True
B,True,False,True,False
C,False,True,False,False
D,True,False,True,False
E,True,True,True,True


In [38]:
df[df['W'] > 0]

Unnamed: 0,W,Y,Z,new
A,2.70685,0.907969,0.503826,3.614819
B,0.651118,-0.848077,0.605965,-0.196959
D,0.188695,-0.933237,0.955057,-0.744542
E,0.190794,2.605967,0.683509,2.796762


In [49]:
df.reset_index(inplace=False)

Unnamed: 0,index,W,Y,Z,new
0,A,2.70685,0.907969,0.503826,3.614819
1,B,0.651118,-0.848077,0.605965,-0.196959
2,C,-2.018168,0.528813,-0.589001,-1.489355
3,D,0.188695,-0.933237,0.955057,-0.744542
4,E,0.190794,2.605967,0.683509,2.796762


In [48]:
df[(df['Z'] > 0) & (df['W'] > 1)]

Unnamed: 0,W,Y,Z,new
A,2.70685,0.907969,0.503826,3.614819


In [53]:
newind = 'CA NY WY OR CO'.split(" ")
newind
df["States"] = newind
df

Unnamed: 0,W,Y,Z,new,States
A,2.70685,0.907969,0.503826,3.614819,CA
B,0.651118,-0.848077,0.605965,-0.196959,NY
C,-2.018168,0.528813,-0.589001,-1.489355,WY
D,0.188695,-0.933237,0.955057,-0.744542,OR
E,0.190794,2.605967,0.683509,2.796762,CO


In [55]:
df.set_index('States')

Unnamed: 0_level_0,W,Y,Z,new
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2.70685,0.907969,0.503826,3.614819
NY,0.651118,-0.848077,0.605965,-0.196959
WY,-2.018168,0.528813,-0.589001,-1.489355
OR,0.188695,-0.933237,0.955057,-0.744542
CO,0.190794,2.605967,0.683509,2.796762
