## Indexes


In [91]:
import pandas as pd

In [92]:
people = {
    "first" : ["Sutton" , "Kat" , "Jane"] ,
     "last" : ["Brady" , "Edison" , "Sloan"] ,
     "email" : ["sutton@gmail.com" , "kat@gmail.com" , "jane@gmail.com"],
     "workplace" : ["Scarlet" , "Scarlet" , "Incite"]
}

In [93]:
df = pd.DataFrame(people)


In [94]:
df.columns

Index(['first', 'last', 'email', 'workplace'], dtype='object')

In [95]:
df.index

RangeIndex(start=0, stop=3, step=1)

## iloc

In [96]:
df.iloc[[0 , 1] , 1]
#Iloc gives rows
#[0,1] = rows
# 1 => columns

0     Brady
1    Edison
Name: last, dtype: object

## Loc

In [97]:
df.loc[0 , 'email']

'sutton@gmail.com'

In [98]:
df.loc[[0,1] , 'first':'email']

Unnamed: 0,first,last,email
0,Sutton,Brady,sutton@gmail.com
1,Kat,Edison,kat@gmail.com


## Indexing


In [99]:
df.set_index('email')
#Doesnt change in original dataframe

Unnamed: 0_level_0,first,last,workplace
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
sutton@gmail.com,Sutton,Brady,Scarlet
kat@gmail.com,Kat,Edison,Scarlet
jane@gmail.com,Jane,Sloan,Incite


In [100]:
df

Unnamed: 0,first,last,email,workplace
0,Sutton,Brady,sutton@gmail.com,Scarlet
1,Kat,Edison,kat@gmail.com,Scarlet
2,Jane,Sloan,jane@gmail.com,Incite


## Set index

In [101]:
df.set_index('email' , inplace = True)


In [102]:
df

Unnamed: 0_level_0,first,last,workplace
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
sutton@gmail.com,Sutton,Brady,Scarlet
kat@gmail.com,Kat,Edison,Scarlet
jane@gmail.com,Jane,Sloan,Incite


Changes reflected in Iloc and Loc

In [103]:
df.iloc[[0 , 1] , 0]

email
sutton@gmail.com    Sutton
kat@gmail.com          Kat
Name: first, dtype: object

In [104]:
df.loc['kat@gmail.com']

first            Kat
last          Edison
workplace    Scarlet
Name: kat@gmail.com, dtype: object

In [105]:
#df.loc[0]
# Wont work because column changed to "email"

In [106]:
df.reset_index()

Unnamed: 0,email,first,last,workplace
0,sutton@gmail.com,Sutton,Brady,Scarlet
1,kat@gmail.com,Kat,Edison,Scarlet
2,jane@gmail.com,Jane,Sloan,Incite


In [107]:
df = df.reset_index()

In [108]:
df

Unnamed: 0,email,first,last,workplace
0,sutton@gmail.com,Sutton,Brady,Scarlet
1,kat@gmail.com,Kat,Edison,Scarlet
2,jane@gmail.com,Jane,Sloan,Incite


## Filter

In [109]:
filt = df['workplace'] == 'Scarlet'

In [110]:
df.loc[filt , 'first']

0    Sutton
1       Kat
Name: first, dtype: object

### Filter with conditions

In [111]:
#OR
filt_workspace = (df['workplace'] == 'Scarlet') | (df['first'] == 'Kat') 

In [112]:
df.loc[filt_workspace, 'email']

0    sutton@gmail.com
1       kat@gmail.com
Name: email, dtype: object

In [113]:
#Negation
df.loc[~filt_workspace , 'email']

2    jane@gmail.com
Name: email, dtype: object

In [114]:
#AND 
filt_and_op = (df['workplace'] == 'Scarlet') & (df['first'] == 'Kat') 
df.loc[filt_and_op , 'first']


1    Kat
Name: first, dtype: object

## Rename


In [115]:
df.loc[1 , 'email'] = ['kat1233@gmail.com']

In [116]:
df.loc[2] = ['JaneSloan@gmail.com' , 'jane' , 'sloan' , 'Freelancer']

In [117]:
df

Unnamed: 0,email,first,last,workplace
0,sutton@gmail.com,Sutton,Brady,Scarlet
1,kat1233@gmail.com,Kat,Edison,Scarlet
2,JaneSloan@gmail.com,jane,sloan,Freelancer


In [118]:
##UpperCase

In [119]:
df['first'] = df['first'].str.upper() 

In [120]:
df

Unnamed: 0,email,first,last,workplace
0,sutton@gmail.com,SUTTON,Brady,Scarlet
1,kat1233@gmail.com,KAT,Edison,Scarlet
2,JaneSloan@gmail.com,JANE,sloan,Freelancer


Apply on Series

In [121]:
def updateEmail(email):
    return email.upper()


In [122]:
df['email'] = df['email'].apply(updateEmail)
df

Unnamed: 0,email,first,last,workplace
0,SUTTON@GMAIL.COM,SUTTON,Brady,Scarlet
1,KAT1233@GMAIL.COM,KAT,Edison,Scarlet
2,JANESLOAN@GMAIL.COM,JANE,sloan,Freelancer


In [123]:
#Using lambda function
df['email'] = df['email'].apply(lambda x : x.lower())
df

Unnamed: 0,email,first,last,workplace
0,sutton@gmail.com,SUTTON,Brady,Scarlet
1,kat1233@gmail.com,KAT,Edison,Scarlet
2,janesloan@gmail.com,JANE,sloan,Freelancer


Apply on DataFrames

In [124]:
df['email'].apply(len) 
#By default gets applied for rows

0    16
1    17
2    19
Name: email, dtype: int64

In [125]:
df.apply(len , axis = 'columns')

0    4
1    4
2    4
dtype: int64

In [126]:
len(df['email'])

3

In [127]:
df.apply(pd.Series.min)

email        janesloan@gmail.com
first                       JANE
last                       Brady
workplace             Freelancer
dtype: object

In [128]:
df.apply(lambda x : x.min()) #Using Lambda Function

email        janesloan@gmail.com
first                       JANE
last                       Brady
workplace             Freelancer
dtype: object

Apply map

In [129]:
df.applymap(len)
#Applies on every element

Unnamed: 0,email,first,last,workplace
0,16,6,5,7
1,17,3,6,7
2,19,4,5,10


In [130]:
df.applymap(str.lower)

Unnamed: 0,email,first,last,workplace
0,sutton@gmail.com,sutton,brady,scarlet
1,kat1233@gmail.com,kat,edison,scarlet
2,janesloan@gmail.com,jane,sloan,freelancer


Map



In [131]:
df

Unnamed: 0,email,first,last,workplace
0,sutton@gmail.com,SUTTON,Brady,Scarlet
1,kat1233@gmail.com,KAT,Edison,Scarlet
2,janesloan@gmail.com,JANE,sloan,Freelancer


In [134]:
df['first'].map({'SUTTON' : 'Sutton' , 'JANE' : 'Jane'})

0    Sutton
1       NaN
2      Jane
Name: first, dtype: object

In [136]:
df['first'] = df['first'].replace({'Sutton' : 'sutton'})
df

Unnamed: 0,email,first,last,workplace
0,sutton@gmail.com,SUTTON,Brady,Scarlet
1,kat1233@gmail.com,KAT,Edison,Scarlet
2,janesloan@gmail.com,JANE,Sloan,Freelancer
