# DataFrames - Part 1 (S6L27)

In [1]:
import numpy as np
import pandas as pd

In [2]:
from numpy.random import randn

In [3]:
np.random.seed(101)

In [4]:
df = pd.DataFrame(randn(5,4),['a','b','c','d','e'],['w','x','y','z'])

In [5]:
df

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
d,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [6]:
df['w']

a    2.706850
b    0.651118
c   -2.018168
d    0.188695
e    0.190794
Name: w, dtype: float64

In [7]:
type(df['w'])

pandas.core.series.Series

In [8]:
type(df)

pandas.core.frame.DataFrame

In [9]:
df.w

a    2.706850
b    0.651118
c   -2.018168
d    0.188695
e    0.190794
Name: w, dtype: float64

In [10]:
df[['w','z']]

Unnamed: 0,w,z
a,2.70685,0.503826
b,0.651118,0.605965
c,-2.018168,-0.589001
d,0.188695,0.955057
e,0.190794,0.683509


In [11]:
df['new']

KeyError: 'new'

In [12]:
df['new'] = df['w'] + df['y']

In [13]:
df

Unnamed: 0,w,x,y,z,new
a,2.70685,0.628133,0.907969,0.503826,3.614819
b,0.651118,-0.319318,-0.848077,0.605965,-0.196959
c,-2.018168,0.740122,0.528813,-0.589001,-1.489355
d,0.188695,-0.758872,-0.933237,0.955057,-0.744542
e,0.190794,1.978757,2.605967,0.683509,2.796762


In [14]:
df.drop('new')

KeyError: "['new'] not found in axis"

In [15]:
df.drop('new',axis=1)

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
d,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [16]:
df

Unnamed: 0,w,x,y,z,new
a,2.70685,0.628133,0.907969,0.503826,3.614819
b,0.651118,-0.319318,-0.848077,0.605965,-0.196959
c,-2.018168,0.740122,0.528813,-0.589001,-1.489355
d,0.188695,-0.758872,-0.933237,0.955057,-0.744542
e,0.190794,1.978757,2.605967,0.683509,2.796762


In [17]:
df.drop('new',axis=1,inplace=True)

In [18]:
df

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
d,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [19]:
df.drop('e')

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
d,0.188695,-0.758872,-0.933237,0.955057


In [20]:
df

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
d,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [21]:
df.shape

(5, 4)

In [22]:
df['y']

a    0.907969
b   -0.848077
c    0.528813
d   -0.933237
e    2.605967
Name: y, dtype: float64

In [23]:
df[['z','x']]

Unnamed: 0,z,x
a,0.503826,0.628133
b,0.605965,-0.319318
c,-0.589001,0.740122
d,0.955057,-0.758872
e,0.683509,1.978757


## Selecting Rows

In [24]:
df

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
d,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [25]:
df.loc['a']

w    2.706850
x    0.628133
y    0.907969
z    0.503826
Name: a, dtype: float64

In [26]:
df.iloc[2]

w   -2.018168
x    0.740122
y    0.528813
z   -0.589001
Name: c, dtype: float64

In [27]:
df.loc['b','y']

-0.8480769834036315

In [28]:
df.loc[['a','e'],['x','y']]

Unnamed: 0,x,y
a,0.628133,0.907969
e,1.978757,2.605967
