In [3]:
import numpy as np
import pandas as pd
from numpy.random import randn
np.random.seed(101) ## Seed means that every one will get same random numbers, so that we can match course data

In [4]:
df = pd.DataFrame(data = randn(5,4), index=["A", "B", "C", "D", "E"], columns=["W", "X", "Y", "Z"])

In [5]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [6]:
##CONDITIONAL SELECTION 

In [7]:
df > 0 ## similar to what we did with numpy

Unnamed: 0,W,X,Y,Z
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True
E,True,True,True,True


In [9]:
booldf = df> 0
df[booldf] ## so it creates a dataframe where the values are rendered only when the value is greater then 0 else NaN

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [10]:
df[df>0] ## same but diff way of writing

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [26]:

df["W"] > 0  ## see the series has been returned with boolean values.

A     True
B     True
C    False
D     True
E     True
Name: W, dtype: bool

### This can be important for future when we do the data cleaning stuff.
### see how we selected a column /series from the data frame which was "W"
### Then after we added the condition to remove the false value which was row "C"
### In the result the "C" row is removed from every column. 

In [25]:


df[df["W"] > 0] ## SEE how the "C" row has been removed, because we get only rows which agrees to condition



Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [31]:
df[df["Z"] < 0] 

Unnamed: 0,W,X,Y,Z
C,-2.018168,0.740122,0.528813,-0.589001


In [36]:
## Using Multiple conditions together
## df[df["W"] > 0 and df["Y"] < 0]    ## this gives  The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
## python "and" operator gets confused when we do this. Hence we have to use "&" operator

df[(df["W"] > 0) & (df["Y"] > 1)]  ## USE PARANTHESES TO SEPARATE THEM

Unnamed: 0,W,X,Y,Z
E,0.190794,1.978757,2.605967,0.683509


In [37]:

## RESETTING THE INDEX OR SETTING IT TO SOMETHING ELSE 


In [38]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [40]:
df.reset_index()  ## see how the index has been reset to 0, 1,2,3. 
## Again impulse True resets the index of the orginal dataframe

Unnamed: 0,index,W,X,Y,Z
0,A,2.70685,0.628133,0.907969,0.503826
1,B,0.651118,-0.319318,-0.848077,0.605965
2,C,-2.018168,0.740122,0.528813,-0.589001
3,D,0.188695,-0.758872,-0.933237,0.955057
4,E,0.190794,1.978757,2.605967,0.683509


In [41]:
## Creating a new Indexx
new_index = 'CA NY WY OR CO'.split()

In [42]:
new_index

['CA', 'NY', 'WY', 'OR', 'CO']

In [43]:
df["States"] = new_index   ## inserting a new column 

In [44]:
df


Unnamed: 0,W,X,Y,Z,States
A,2.70685,0.628133,0.907969,0.503826,CA
B,0.651118,-0.319318,-0.848077,0.605965,NY
C,-2.018168,0.740122,0.528813,-0.589001,WY
D,0.188695,-0.758872,-0.933237,0.955057,OR
E,0.190794,1.978757,2.605967,0.683509,CO


In [46]:
## Now we want the column states to be the index.
df.set_index("States")
## it removes the old index and makes state as a new index

Unnamed: 0_level_0,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2.70685,0.628133,0.907969,0.503826
NY,0.651118,-0.319318,-0.848077,0.605965
WY,-2.018168,0.740122,0.528813,-0.589001
OR,0.188695,-0.758872,-0.933237,0.955057
CO,0.190794,1.978757,2.605967,0.683509


In [47]:
df ## again impulse true will overwrite the original one.

Unnamed: 0,W,X,Y,Z,States
A,2.70685,0.628133,0.907969,0.503826,CA
B,0.651118,-0.319318,-0.848077,0.605965,NY
C,-2.018168,0.740122,0.528813,-0.589001,WY
D,0.188695,-0.758872,-0.933237,0.955057,OR
E,0.190794,1.978757,2.605967,0.683509,CO
