In [193]:
import pandas as pd

In [194]:
people = {
    'first': ['evan', 'john', 'jason'], 
    'last': ['Das', 'Wachs', 'Li'], 
    'email': ['evandas@gmail.com', 'johnwachs@hotmail.com', 'jasonli@outlook.com'], 
    'gender': ['Male', 'Male', 'Female']
}
df_people = pd.DataFrame(people)
df_people

Unnamed: 0,first,last,email,gender
0,evan,Das,evandas@gmail.com,Male
1,john,Wachs,johnwachs@hotmail.com,Male
2,jason,Li,jasonli@outlook.com,Female


In [195]:
df_people['email']

0        evandas@gmail.com
1    johnwachs@hotmail.com
2      jasonli@outlook.com
Name: email, dtype: object

In [196]:
# generate a series object
type(df_people['email'])

pandas.core.series.Series

In [197]:
df_people[['last', 'email']]
# pass a list inside the dataframe

Unnamed: 0,last,email
0,Das,evandas@gmail.com
1,Wachs,johnwachs@hotmail.com
2,Li,jasonli@outlook.com


In [198]:
df_people.columns

Index(['first', 'last', 'email', 'gender'], dtype='object')

In [199]:
df_people.iloc[[0, 1], [1, 2]]
# iloc using integer index or list[int, int, ...] to locate specific row(s)
# or you can choose specific index column
# inclusive
# slice: row[0, 1] and return their col[1, 2]

Unnamed: 0,last,email
0,Das,evandas@gmail.com
1,Wachs,johnwachs@hotmail.com


In [200]:
df_people.loc[[0, 1], 'email']

0        evandas@gmail.com
1    johnwachs@hotmail.com
Name: email, dtype: object

In [201]:
df_people.set_index('email', inplace = True)
# set a column as index
# original wont be inplace changed by default


In [202]:
df_people.loc['evandas@gmail.com', 'last']
# then you can use loc to access the `index`

'Das'

You can rename the dataframe simply by input another list

In [203]:
df_people.columns
df_people.columns = ['first_name', 'last_name', 'Gender']
df_people

Unnamed: 0_level_0,first_name,last_name,Gender
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
evandas@gmail.com,evan,Das,Male
johnwachs@hotmail.com,john,Wachs,Male
jasonli@outlook.com,jason,Li,Female


You can also use list comprehension to rename all the column names

In [204]:
df_people.columns = [x.upper() for x in df_people.columns]
df_people

Unnamed: 0_level_0,FIRST_NAME,LAST_NAME,GENDER
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
evandas@gmail.com,evan,Das,Male
johnwachs@hotmail.com,john,Wachs,Male
jasonli@outlook.com,jason,Li,Female


`str.replace` function


In [205]:
df_people.columns = df_people.columns.str.replace('_', ' ')
df_people

Unnamed: 0_level_0,FIRST NAME,LAST NAME,GENDER
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
evandas@gmail.com,evan,Das,Male
johnwachs@hotmail.com,john,Wachs,Male
jasonli@outlook.com,jason,Li,Female


use `df.rename(columns = {})` function to rename, `inplace = False` by default

In [206]:
df_people.rename(columns = {
    'FIRST NAME': 'first_name',
    'LAST NAME': 'last_name'
}, inplace = True)

Notice `iloc` only receive Integers, it asks for index, `loc` can take both row labels (index) and column labels

In [207]:
df_people.loc['evandas@gmail.com', ['first_name', 'last_name']] = ['EVAN', 'DAS']
df_people

Unnamed: 0_level_0,first_name,last_name,GENDER
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
evandas@gmail.com,EVAN,DAS,Male
johnwachs@hotmail.com,john,Wachs,Male
jasonli@outlook.com,jason,Li,Female


change one value

In [208]:
df_people.loc['johnwachs@hotmail.com', 'first_name'] = 'JOHN'
df_people

Unnamed: 0_level_0,first_name,last_name,GENDER
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
evandas@gmail.com,EVAN,DAS,Male
johnwachs@hotmail.com,JOHN,Wachs,Male
jasonli@outlook.com,jason,Li,Female


In [209]:
df_peoplp = df_people.reset_index(inplace = True)
# reset index back to 0
df_people


Unnamed: 0,email,first_name,last_name,GENDER
0,evandas@gmail.com,EVAN,DAS,Male
1,johnwachs@hotmail.com,JOHN,Wachs,Male
2,jasonli@outlook.com,jason,Li,Female


In [210]:
# set all email address to upper case
# Approach 1
df_people['email'] = df_people['email'].str.lower()
df_people

Unnamed: 0,email,first_name,last_name,GENDER
0,evandas@gmail.com,EVAN,DAS,Male
1,johnwachs@hotmail.com,JOHN,Wachs,Male
2,jasonli@outlook.com,jason,Li,Female


__`apply` passes every columns by default__

In [211]:
# Approach 2
# Using apply to pass a function
def update_email(email):
    return email.upper()
df_people['email'].apply(update_email)

0        EVANDAS@GMAIL.COM
1    JOHNWACHS@HOTMAIL.COM
2      JASONLI@OUTLOOK.COM
Name: email, dtype: object

In [212]:
# Approach 3, same with Approach 2
df_people['email'].apply(lambda x: x.upper())

0        EVANDAS@GMAIL.COM
1    JOHNWACHS@HOTMAIL.COM
2      JASONLI@OUTLOOK.COM
Name: email, dtype: object

In [216]:
df_people

Unnamed: 0,email,first_name,last_name,GENDER
0,evandas@gmail.com,EVAN,DAS,Male
1,johnwachs@hotmail.com,JOHN,Wachs,Male
2,jasonli@outlook.com,jason,Li,Female


In [217]:
df_people.apply(len, axis = 'columns')

0    4
1    4
2    4
dtype: int64

`axis = 'rows'` by default

In [None]:
df_people.apply(len)

email         3
first_name    3
last_name     3
GENDER        3
dtype: int64

In [220]:
df_people

Unnamed: 0,email,first_name,last_name,GENDER
0,evandas@gmail.com,EVAN,DAS,Male
1,johnwachs@hotmail.com,JOHN,Wachs,Male
2,jasonli@outlook.com,jason,Li,Female


`map()` similar to list comprehension, it applies sth to every value in the list

`lambda x: x.map(len)` == `lambda col: [len(x) for x in col]`

######## Notice `map(func)` can only applied to pandas series， therefore `map(lambda x: )` is often used

In [None]:
df_people.apply(lambda x: x.map(len)) 
df_people.apply(lambda col: [len(x) for x in col])

Unnamed: 0,email,first_name,last_name,GENDER
0,17,4,3,4
1,21,4,5,4
2,19,5,2,6


`replace`

In [237]:
df_people['first_name'] = df_people['first_name'].replace({'EVAN': 'evan'})
df_people

Unnamed: 0,email,first_name,last_name,GENDER
0,evandas@gmail.com,evan,DAS,Male
1,johnwachs@hotmail.com,JOHN,Wachs,Male
2,jasonli@outlook.com,jason,Li,Female
