In [1]:
people = {
    "first": ["Corey", "Jane", "John"],
    "last": ["Schafer", "Doe", "Doe"],
    "email": ["CoreyMSchafer@gmail.com", "JaneDoe@email.com", "JohnDoe@email.com"]
}

In [2]:
import pandas as pd

df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [3]:
df['email']

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
2          JohnDoe@email.com
Name: email, dtype: object

In [4]:
print(type(df))
print(type(df['email']))
print(type(df[['first', 'last']]))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>


In [5]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [6]:
# index locate func
# df.iloc[0]
df.iloc[[0, 1], 2] # get first and second rows, and get the 3 column

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
Name: email, dtype: object

In [7]:
# locate func (with index labels)
# df.loc[0:1]
df.loc[0:1, ['first', 'last']] # use labels to find data

Unnamed: 0,first,last
0,Corey,Schafer
1,Jane,Doe


In [8]:
# df.set_index('email', inplace=True)
# df

In [9]:
# df.loc['CoreyMSchafer@gmail.com']
# Doesn't work anymore because the index is no longer ints
# Use iloc if you want int based search with non-int indexes
# df.loc[0] 

In [10]:
# df.reset_index(inplace=True)
# df

In [11]:
filt = (df['last'] == 'Doe')
print(type(filt))
filt

<class 'pandas.core.series.Series'>


0    False
1     True
2     True
Name: last, dtype: bool

In [12]:
# df[filt]
df.loc[filt]
# df[df['last'] == 'Doe']

Unnamed: 0,first,last,email
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [13]:
df[(df['last'] == 'Doe') & (df['first'] == 'John')] # and bool condition for df filter
df[(df['last'] == 'Doe') | (df['first'] == 'John')] # or bool condition for df filter
df[~filt] # negate operand

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com


In [14]:
df.columns = ['first_name', 'last_name', 'email']

In [15]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [16]:
df.columns = [x.upper() for x in df.columns] # list comprehension to uppercase all column names
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [17]:
df.columns = df.columns.str.replace(' ', '_') # replace all whitespace with underscores
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [25]:
df.rename(columns={'FIRST_NAME': 'first', 'LAST_NAME': 'last', 'EMAIL': 'email'}, inplace=True)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Shmitty,JohnSmith@email.com


In [23]:
# df.loc[2, 'last'] = 'smith'
# df.loc[2] = ['John', 'Smith', 'JohnSmith@email.com']
df.at[2, 'last'] = 'Shmitty'
df

Unnamed: 0,first,last,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Shmitty,JohnSmith@email.com


In [27]:
df['email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Shmitty,johnsmith@email.com


# Data Manuipulation methods

Apply - perform a function on a Series - returns a Series.  
ApplyMap - apply to every element in a DataFrame - Returns a DataFrame.  
Map - Substitue values in a Series, will mark non-mapped values as 'NaN'.  
Replace - Same as map, but will not change non-mapped values.  

In [31]:
# Using apply on a single column

# df['email'].apply(len)

# def update_email(email: str):
#     return email.upper()
# df['email'].apply(update_email)

df['email'].apply(lambda x: x.upper())

0    COREYMSCHAFER@GMAIL.COM
1          JANEDOE@EMAIL.COM
2        JOHNSMITH@EMAIL.COM
Name: email, dtype: object

In [32]:
df.apply(len) # applies function to all Series in DataFrame (based on axis)

first    3
last     3
email    3
dtype: int64

In [33]:
df.apply(pd.Series.min)

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [34]:
df.applymap(len)

Unnamed: 0,first,last,email
0,5,7,23
1,4,3,17
2,4,7,19


In [None]:
df.applymap(str.lower)

In [36]:
df['first'].map({'Corey': 'Chris', 'Jane': 'Mary'}) # will turn non mapped values to NaN

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

In [38]:
df['first'].replace({'Corey': 'Chris', 'Jane': 'Mary'}) # same as map, doesn't change 'missing' values

0    Chris
1     Mary
2     John
Name: first, dtype: object