# Pandas: Data Frames

In [1]:
import numpy as np
import pandas as pd

In [2]:
from numpy.random import randn

In [3]:
np.random.seed(101)

In [4]:
#df = pd.DataFrame(data, [index], [column])
df = pd.DataFrame(randn(5,4), ['A', 'B', 'C', 'D', 'E'], ['W', 'X', 'Y', 'Z'])
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [5]:
#Return a series
df['Z']

A    0.503826
B    0.605965
C   -0.589001
D    0.955057
E    0.683509
Name: Z, dtype: float64

In [6]:
#Return a series
df['X']

A    0.628133
B   -0.319318
C    0.740122
D   -0.758872
E    1.978757
Name: X, dtype: float64

In [7]:
type(df)

pandas.core.frame.DataFrame

In [8]:
type(df['X'])

pandas.core.series.Series

In [9]:
#List of columns
df[['X','Y','Z']]

Unnamed: 0,X,Y,Z
A,0.628133,0.907969,0.503826
B,-0.319318,-0.848077,0.605965
C,0.740122,0.528813,-0.589001
D,-0.758872,-0.933237,0.955057
E,1.978757,2.605967,0.683509


In [10]:
#Creating new column
df['new'] = df['Y'] + df['Z']
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,1.411795
B,0.651118,-0.319318,-0.848077,0.605965,-0.242112
C,-2.018168,0.740122,0.528813,-0.589001,-0.060187
D,0.188695,-0.758872,-0.933237,0.955057,0.021819
E,0.190794,1.978757,2.605967,0.683509,3.289476


In [11]:
#Removing column
df1= df.drop('new', axis=1)
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,1.411795
B,0.651118,-0.319318,-0.848077,0.605965,-0.242112
C,-2.018168,0.740122,0.528813,-0.589001,-0.060187
D,0.188695,-0.758872,-0.933237,0.955057,0.021819
E,0.190794,1.978757,2.605967,0.683509,3.289476


In [12]:
df1

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [13]:
#Removing column with inplace=True
df.drop('new',axis=1,inplace=True)


In [14]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [16]:
#Removing row
df.drop('B', axis=0)

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [20]:
df.shape
#5 rows 4 col based on example
# row is axis=0 and col is axis=1 

(5, 4)

In [26]:
#Selecting Rows
df.loc[['A', 'C']]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
C,-2.018168,0.740122,0.528813,-0.589001


In [27]:
#Selecting Columns
df[['X', 'Z']]

Unnamed: 0,X,Z
A,0.628133,0.503826
B,-0.319318,0.605965
C,0.740122,-0.589001
D,-0.758872,0.955057
E,1.978757,0.683509


In [29]:
#Index based location for selecting rows
df.iloc[[1,3]]

Unnamed: 0,W,X,Y,Z
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057


In [30]:
df.iloc[0]

W    2.706850
X    0.628133
Y    0.907969
Z    0.503826
Name: A, dtype: float64

In [33]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [34]:
df.loc['D', 'Y']

-0.9332372163009188

In [38]:
#Return a subsets [[row], [cols]]
df.loc[['A', 'E'], ['W','Z']]

Unnamed: 0,W,Z
A,2.70685,0.503826
E,0.190794,0.683509
