# Pandas DataFrames


In [None]:
import numpy as np
import pandas as pd

In [None]:
from numpy.random import randn

In [None]:
np.random.seed(101)

In [None]:
df=pd.DataFrame(randn(5,4), ['A','B','C','D','E'], ['W','X','Y','Z' ])

In [None]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


# Conditional Selection

## Simple Conditional

In [None]:
# Returning a bunch of booleans...:/
df>0

Unnamed: 0,W,X,Y,Z
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True
E,True,True,True,True


In [None]:
# This operation returns a DataFrame:
type(df>0)

pandas.core.frame.DataFrame

In [None]:
# To work with Conditional Selection the best way is,
# instead of comparing and passing in the comparative of inequity, 
# pass in the entire dataframe, like this...
df[df>0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [None]:
# The previous operation returns a DataFrame:
type(df[df>0])

pandas.core.frame.DataFrame

In [None]:
# ...as well as we can pass in the column as a parameter, or, even better,...
df['W']>0

A     True
B     True
C    False
D     True
E     True
Name: W, dtype: bool

In [None]:
# The previous operation returns a Series:
type(df['W']>0)

pandas.core.series.Series

In [None]:
# ...for instance, return the DataFrame that 'W' is different from zero;
# Here have the return of a subset of the dataframe (instead of bool type),
# on which we can work, independently of the original dataframe.
# Now we have no non-existent values (NaN) as output o/
# It does not return the row 'C'
df[df['W']>0]


Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [None]:
# Here is the DataFrame again...
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [None]:
# Returning the DataFrame that 'Z' is less than zero;
# It does return the row 'C' only:
df[df['Z']<0]

Unnamed: 0,W,X,Y,Z
C,-2.018168,0.740122,0.528813,-0.589001


In [None]:
# If we want to return 'X' column where 'W' is greater than zero...
# Slicing & Indexing like this:
df[df['W']>0]['X']

A    0.628133
B   -0.319318
D   -0.758872
E    1.978757
Name: X, dtype: float64

In [None]:
# The previous operation returns a Series:
type(df[df['W']>0]['X'])

pandas.core.series.Series

## Multiple Conditional

In [None]:
# Remember to use parentheses to separate each conditional Selection; this is AND...
# Use df[()&()] or df[()|()]:
df[(df['W']>0)&(df['X']>1)]

Unnamed: 0,W,X,Y,Z
E,0.190794,1.978757,2.605967,0.683509


In [None]:
# ...And this is OR
df[(df['W']>0)|(df['Y']>1)]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [None]:
# The previous operation returns a DataFrame:
type(df[(df['W']>0)|(df['Y']>1)])

pandas.core.frame.DataFrame

# Slicing & Indexing

In [None]:
# Another example: 
# I want to retrieve all DataFrame values where 
# column 'W' is greater than zero, filtering Columns 'W' and 'Y'
# Slicing & Indexing like this: df[df[][]], in ONE STEP:
df[(df['W']>0)][['W', 'Y']]

Unnamed: 0,W,Y
A,2.70685,0.907969
B,0.651118,-0.848077
D,0.188695,-0.933237
E,0.190794,2.605967


In [None]:
# But we can do it in MULTIPLE STEPS, like theses:
boolser=df['W']>0 

In [None]:
boolser

A     True
B     True
C    False
D     True
E     True
Name: W, dtype: bool

In [None]:
# The previous operation returns a Series:
type(boolser)

pandas.core.series.Series

In [None]:
# Filtering the DataFrame with booleans returned
result=df[boolser]

In [None]:
result

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [None]:
# The previous operation returns a DataFrame:
type(result)

pandas.core.frame.DataFrame

In [None]:
# Now create a python list with the columns we want to return
myCols=['W', 'Y']

In [None]:
# The previous operation returns a Python List:
type(myCols)

list

In [None]:
# Now filtering the DataFrame result pass in the list as filter, and
# The result is the same as running in one cell [20] above
# There you have it!

result[myCols]

Unnamed: 0,W,Y
A,2.70685,0.907969
B,0.651118,-0.848077
D,0.188695,-0.933237
E,0.190794,2.605967


In [None]:
print("Thanks for reading. Favorite it if you Like it o/")

Thanks for reading. Favorite it if you Like it o/
