In [164]:
import pandas as pd

In [165]:
people = {
    "first": ["Jerin", "Jane", "John"],
    "last": ["Thomas", "Doe", "Doe"],
    "email": ["jerinthomas17@gmail.com", "janedoe@email.com", "johndoe@email.com"]
}

In [166]:
# dict to df
df2 = pd.DataFrame(people)

In [167]:
df2

Unnamed: 0,first,last,email
0,Jerin,Thomas,jerinthomas17@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [168]:
# df2.email
# df2.email == df2['email']
# But in case email is an attribute, (.) operator can be problematic
# df2.count returns count attribute/method rather than count column
df2['email']

0    jerinthomas17@gmail.com
1          janedoe@email.com
2          johndoe@email.com
Name: email, dtype: object

In [169]:
type(df2['email'])

pandas.core.series.Series

In [170]:
df2[['last', 'email']]

Unnamed: 0,last,email
0,Thomas,jerinthomas17@gmail.com
1,Doe,janedoe@email.com
2,Doe,johndoe@email.com


In [171]:
type(df2[['last', 'email']])

pandas.core.frame.DataFrame

In [172]:
# get columns
df2.columns

Index(['first', 'last', 'email'], dtype='object')

In [173]:
# get rows: loc and iloc - location and index-location
df2.iloc[0]

first                      Jerin
last                      Thomas
email    jerinthomas17@gmail.com
Name: 0, dtype: object

In [174]:
df2.iloc[[0, 1]]

Unnamed: 0,first,last,email
0,Jerin,Thomas,jerinthomas17@gmail.com
1,Jane,Doe,janedoe@email.com


In [175]:
df2.iloc[[0, 1], 2]

0    jerinthomas17@gmail.com
1          janedoe@email.com
Name: email, dtype: object

In [176]:
df2

Unnamed: 0,first,last,email
0,Jerin,Thomas,jerinthomas17@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [177]:
df2.loc[0]
df2.loc[[0,1], ['email', 'first']]

Unnamed: 0,email,first
0,jerinthomas17@gmail.com,Jerin
1,janedoe@email.com,Jane


In [178]:
df2.set_index('email')    #doesn't modify the index

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
jerinthomas17@gmail.com,Jerin,Thomas
janedoe@email.com,Jane,Doe
johndoe@email.com,John,Doe


In [179]:
df2

Unnamed: 0,first,last,email
0,Jerin,Thomas,jerinthomas17@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [180]:
df2.set_index('email', inplace=True)   #modifies the df and index changes
df2

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
jerinthomas17@gmail.com,Jerin,Thomas
janedoe@email.com,Jane,Doe
johndoe@email.com,John,Doe


In [181]:
df2.loc['jerinthomas17@gmail.com']

first     Jerin
last     Thomas
Name: jerinthomas17@gmail.com, dtype: object

In [182]:
df2.reset_index(inplace=True)   #modifies the df and index changes
# df2 = df2.reset_index(drop=True)
df2

Unnamed: 0,email,first,last
0,jerinthomas17@gmail.com,Jerin,Thomas
1,janedoe@email.com,Jane,Doe
2,johndoe@email.com,John,Doe


Filtering

In [183]:
df2['last'] == 'Doe'
filt = (df2['last'] == 'Doe')

In [184]:
df2[filt]
df2[df2['last'] == 'Doe']

Unnamed: 0,email,first,last
1,janedoe@email.com,Jane,Doe
2,johndoe@email.com,John,Doe


In [185]:
df2.loc[filt]

Unnamed: 0,email,first,last
1,janedoe@email.com,Jane,Doe
2,johndoe@email.com,John,Doe


In [186]:
df2.loc[filt, 'email']

1    janedoe@email.com
2    johndoe@email.com
Name: email, dtype: object

In [187]:
# AND, OR keyword : & and |

df2[(df2['last']=='Doe') & (df2['first']=='John')]

Unnamed: 0,email,first,last
2,johndoe@email.com,John,Doe


In [188]:
df2[(df2['last']=='Thomas') | (df2['first']=='Jane')]

Unnamed: 0,email,first,last
0,jerinthomas17@gmail.com,Jerin,Thomas
1,janedoe@email.com,Jane,Doe


In [189]:
df2[~(df2['last']=='Thomas') | (df2['first']=='Jane')]

Unnamed: 0,email,first,last
1,janedoe@email.com,Jane,Doe
2,johndoe@email.com,John,Doe


Updating Column

In [190]:
df2.columns

Index(['email', 'first', 'last'], dtype='object')

In [191]:
df2.columns = ['EMAIL', 'FIRST NAME', 'LAST NAME'] 
df2

Unnamed: 0,EMAIL,FIRST NAME,LAST NAME
0,jerinthomas17@gmail.com,Jerin,Thomas
1,janedoe@email.com,Jane,Doe
2,johndoe@email.com,John,Doe


In [192]:
df2.columns = [x.lower() for x in df2.columns]
df2

Unnamed: 0,email,first name,last name
0,jerinthomas17@gmail.com,Jerin,Thomas
1,janedoe@email.com,Jane,Doe
2,johndoe@email.com,John,Doe


In [193]:
df2.columns = df2.columns.str.replace(' ', '_')
df2

Unnamed: 0,email,first_name,last_name
0,jerinthomas17@gmail.com,Jerin,Thomas
1,janedoe@email.com,Jane,Doe
2,johndoe@email.com,John,Doe


In [194]:
df2.rename(columns={'first_name':'first', 'last_name':'last'}, inplace=True)

In [195]:
df2

Unnamed: 0,email,first,last
0,jerinthomas17@gmail.com,Jerin,Thomas
1,janedoe@email.com,Jane,Doe
2,johndoe@email.com,John,Doe


UPDATING DATA

In [196]:
df2.loc[1] = ['Johnsmith@email.com', 'John', 'Smith']

In [197]:
df2

Unnamed: 0,email,first,last
0,jerinthomas17@gmail.com,Jerin,Thomas
1,Johnsmith@email.com,John,Smith
2,johndoe@email.com,John,Doe


In [198]:
df2.loc[1, ['first', 'last']] = ['John_updated', 'Smith_updated']

In [199]:
df2

Unnamed: 0,email,first,last
0,jerinthomas17@gmail.com,Jerin,Thomas
1,Johnsmith@email.com,John_updated,Smith_updated
2,johndoe@email.com,John,Doe


In [201]:
df2.loc[1, 'last'] = 'Smith_latest'
df2

Unnamed: 0,email,first,last
0,jerinthomas17@gmail.com,Jerin,Thomas
1,Johnsmith@email.com,John_updated,Smith_latest
2,johndoe@email.com,John,Doe


In [203]:
df2.at[1, 'last'] = 'Smith_latest2'   # .at here is same as loc, but proabbly better in performance
df2

Unnamed: 0,email,first,last
0,jerinthomas17@gmail.com,Jerin,Thomas
1,Johnsmith@email.com,John_updated,Smith_latest2
2,johndoe@email.com,John,Doe


In [206]:
df2[(df2['first'] == 'Jerin')]

Unnamed: 0,email,first,last
0,jerinthomas17@gmail.com,Jerin,Thomas


In [209]:
df2[(df2['first'] == 'Jerin')]['last'] = "Thomas_updated"
# Error in such cases... Can't assign directly with filter


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2[(df2['first'] == 'Jerin')]['last'] = "Thomas_updated"


In [212]:
df2.loc[(df2['first'] == 'Jerin'), 'last'] = "Thomas_updated"

In [213]:
df2

Unnamed: 0,email,first,last
0,jerinthomas17@gmail.com,Jerin,Thomas_updated
1,Johnsmith@email.com,John_updated,Smith_latest2
2,johndoe@email.com,John,Doe


In [218]:
# make all email address to lower case

df2['email'] = df2['email'].str.upper()
df2

Unnamed: 0,email,first,last
0,JERINTHOMAS17@GMAIL.COM,Jerin,Thomas_updated
1,JOHNSMITH@EMAIL.COM,John_updated,Smith_latest2
2,JOHNDOE@EMAIL.COM,John,Doe


4 ways to update:
1. APPLY 
2. MAP
3. APPLYMAP
4. REPLACE

In [220]:
# 1. Apply : used to call function on our values

df2['email'].apply(len)


0    23
1    19
2    17
Name: email, dtype: int64

In [226]:
def update_email(email):
    return email.lower()

df2['email'].apply(update_email)

0    jerinthomas17@gmail.com
1        johnsmith@email.com
2          johndoe@email.com
Name: email, dtype: object

In [227]:
df2

Unnamed: 0,email,first,last
0,JERINTHOMAS17@GMAIL.COM,Jerin,Thomas_updated
1,JOHNSMITH@EMAIL.COM,John_updated,Smith_latest2
2,JOHNDOE@EMAIL.COM,John,Doe


In [230]:
df2['email'] = df2['email'].apply(update_email) # assigned it to a series
df2

Unnamed: 0,email,first,last
0,jerinthomas17@gmail.com,Jerin,Thomas_updated
1,johnsmith@email.com,John_updated,Smith_latest2
2,johndoe@email.com,John,Doe


In [231]:
df2['email'] = df2['email'].apply(lambda x: x.upper())
df2

Unnamed: 0,email,first,last
0,JERINTHOMAS17@GMAIL.COM,Jerin,Thomas_updated
1,JOHNSMITH@EMAIL.COM,John_updated,Smith_latest2
2,JOHNDOE@EMAIL.COM,John,Doe


In [236]:
print(df2.apply(len)) #rows = default
print(df2.apply(len, axis='columns'))
print(df2.apply(pd.Series.min))
print(df2.apply(lambda x: x.min()))

email    3
first    3
last     3
dtype: int64
0    3
1    3
2    3
dtype: int64
email    JERINTHOMAS17@GMAIL.COM
first                      Jerin
last                         Doe
dtype: object
email    JERINTHOMAS17@GMAIL.COM
first                      Jerin
last                         Doe
dtype: object


In [239]:
# 2. Apply map: applies it to entire df
df2.applymap(len)
df2.applymap(str.upper)


Unnamed: 0,email,first,last
0,JERINTHOMAS17@GMAIL.COM,JERIN,THOMAS_UPDATED
1,JOHNSMITH@EMAIL.COM,JOHN_UPDATED,SMITH_LATEST2
2,JOHNDOE@EMAIL.COM,JOHN,DOE


In [250]:
# 3. MAP : works only on series
df2['last'].map({"Thomas_updated": "Thomas", "Doe": "DOE_UDPATED"})

0         Thomas
1            NaN
2    DOE_UDPATED
Name: last, dtype: object

In [251]:
df2

Unnamed: 0,email,first,last
0,JERINTHOMAS17@GMAIL.COM,Jerin,Thomas_updated
1,JOHNSMITH@EMAIL.COM,John_updated,Smith_latest2
2,JOHNDOE@EMAIL.COM,John,Doe


In [252]:
# 4. REPLACE : works only on series
df2['last'].replace({"Thomas_updated": "Thomas", "Doe": "DOE_UDPATED"})

0           Thomas
1    Smith_latest2
2      DOE_UDPATED
Name: last, dtype: object

In [253]:
df2

Unnamed: 0,email,first,last
0,JERINTHOMAS17@GMAIL.COM,Jerin,Thomas_updated
1,JOHNSMITH@EMAIL.COM,John_updated,Smith_latest2
2,JOHNDOE@EMAIL.COM,John,Doe
