In [1]:
people = {
    "first": ['Corey', 'Jane', 'John'],
    "last": ['Schafer', 'Doe', 'Doe'],
    "email": ['CoreyMSSchafer@gmail.com', 'janedoe@email.com', 'johndoe@email.com']
}

In [2]:
import pandas as pd

In [3]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSSchafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [5]:
df['email']

0    CoreyMSSchafer@gmail.com
1           janedoe@email.com
2           johndoe@email.com
Name: email, dtype: object

In [6]:
df.set_index('email')  # replaces index numbers with the column email (doesn't carry over to df)

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
CoreyMSSchafer@gmail.com,Corey,Schafer
janedoe@email.com,Jane,Doe
johndoe@email.com,John,Doe


In [7]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSSchafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [8]:
df.set_index('email', inplace=True)  # replaces index numbers with the column email (carries over to df)

In [9]:
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
CoreyMSSchafer@gmail.com,Corey,Schafer
janedoe@email.com,Jane,Doe
johndoe@email.com,John,Doe


In [10]:
df.loc['janedoe@email.com']

first    Jane
last      Doe
Name: janedoe@email.com, dtype: object

In [11]:
df.iloc[1]

first    Jane
last      Doe
Name: janedoe@email.com, dtype: object

In [12]:
df.reset_index(inplace=True)  # reset the index back to number inplace

In [13]:
df

Unnamed: 0,email,first,last
0,CoreyMSSchafer@gmail.com,Corey,Schafer
1,janedoe@email.com,Jane,Doe
2,johndoe@email.com,John,Doe


In [14]:
filt = (df['last'] == 'Doe')

In [15]:
df[filt]

Unnamed: 0,email,first,last
1,janedoe@email.com,Jane,Doe
2,johndoe@email.com,John,Doe


In [16]:
df.loc[filt, 'email']

1    janedoe@email.com
2    johndoe@email.com
Name: email, dtype: object

In [17]:
filt = (df['last'] == 'Doe') | (df['last'] == 'Schafer')  # & and | for And and Or

In [18]:
df.loc[filt]

Unnamed: 0,email,first,last
0,CoreyMSSchafer@gmail.com,Corey,Schafer
1,janedoe@email.com,Jane,Doe
2,johndoe@email.com,John,Doe


In [19]:
df = pd.DataFrame(people)
df.columns  # how to change column names
df.columns = ['first_name', 'last_name', 'email']  # very uncommon method

In [20]:
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSSchafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [21]:
df.rename(columns = {'first_name': 'first', 'last_name': 'last'}, inplace=True)  # more selective method for changing column names

In [22]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSSchafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [23]:
# changing data
df.loc[2] = ['John', 'Smith', 'JohnSmith@email.com']  # non selective method
df.loc[2, ['last', 'email']] = ['Doe', 'JohnDoe@email.com']  # selective method 
df.loc[2, 'last'] = 'Smith'

In [24]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSSchafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,JohnDoe@email.com


In [25]:
df['email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymsschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [26]:
df['email'].apply(len)

0    24
1    17
2    17
Name: email, dtype: int64

In [27]:
def update_email(email):
    return email.upper()

In [28]:
df['email'] = df['email'].apply(update_email)  # no parenthesis 
# can also use lambda function
df['email'] = df['email'].apply(lambda x: x.lower())

df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymsschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [29]:
df.applymap(len)  # applymap allows a function to be applied to every datapoint (use only on dataframe)

Unnamed: 0,first,last,email
0,5,7,24
1,4,3,17
2,4,5,17


In [30]:
df['first'].map({'Corey': 'Chris', 'Jane': 'Mary'})

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

In [31]:
df['first'] = df['first'].replace({'Corey': 'Chris', 'Jane': 'Mary'})
df

Unnamed: 0,first,last,email
0,Chris,Schafer,coreymsschafer@gmail.com
1,Mary,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [32]:
df['full_name'] = df['first'] + ' ' + df['last']
df

Unnamed: 0,first,last,email,full_name
0,Chris,Schafer,coreymsschafer@gmail.com,Chris Schafer
1,Mary,Doe,janedoe@email.com,Mary Doe
2,John,Smith,johndoe@email.com,John Smith


In [33]:
df.drop(columns = ['first', 'last'], inplace=True)
df

Unnamed: 0,email,full_name
0,coreymsschafer@gmail.com,Chris Schafer
1,janedoe@email.com,Mary Doe
2,johndoe@email.com,John Smith


In [36]:
df[['first', 'last']] = df['full_name'].str.split(' ', expand=True)

In [37]:
df

Unnamed: 0,email,full_name,first,last
0,coreymsschafer@gmail.com,Chris Schafer,Chris,Schafer
1,janedoe@email.com,Mary Doe,Mary,Doe
2,johndoe@email.com,John Smith,John,Smith


In [38]:
df.append({'first': 'Tony'}, ignore_index=True)

Unnamed: 0,email,full_name,first,last
0,coreymsschafer@gmail.com,Chris Schafer,Chris,Schafer
1,janedoe@email.com,Mary Doe,Mary,Doe
2,johndoe@email.com,John Smith,John,Smith
3,,,Tony,


In [39]:
people = {
    "first": ['Tony', 'Steve'],
    "last": ['Stark', 'Rogers'],
    "email": ['IronMan@avenge.com', 'cap@avenge.com']
}

In [40]:
df2 = pd.DataFrame(people)
df

Unnamed: 0,email,full_name,first,last
0,coreymsschafer@gmail.com,Chris Schafer,Chris,Schafer
1,janedoe@email.com,Mary Doe,Mary,Doe
2,johndoe@email.com,John Smith,John,Smith


In [41]:
df2

Unnamed: 0,first,last,email
0,Tony,Stark,IronMan@avenge.com
1,Steve,Rogers,cap@avenge.com


In [42]:
df.append(df2, ignore_index=True)

Unnamed: 0,email,full_name,first,last
0,coreymsschafer@gmail.com,Chris Schafer,Chris,Schafer
1,janedoe@email.com,Mary Doe,Mary,Doe
2,johndoe@email.com,John Smith,John,Smith
3,IronMan@avenge.com,,Tony,Stark
4,cap@avenge.com,,Steve,Rogers


In [43]:
df = df.append(df2, ignore_index=True)
df

Unnamed: 0,email,full_name,first,last
0,coreymsschafer@gmail.com,Chris Schafer,Chris,Schafer
1,janedoe@email.com,Mary Doe,Mary,Doe
2,johndoe@email.com,John Smith,John,Smith
3,IronMan@avenge.com,,Tony,Stark
4,cap@avenge.com,,Steve,Rogers


In [44]:
df.drop(index=4)

Unnamed: 0,email,full_name,first,last
0,coreymsschafer@gmail.com,Chris Schafer,Chris,Schafer
1,janedoe@email.com,Mary Doe,Mary,Doe
2,johndoe@email.com,John Smith,John,Smith
3,IronMan@avenge.com,,Tony,Stark


In [46]:
df.drop(index=df[df['last'] == 'Doe'].index)

""" more readable to do:
    filt = df['last'] == 'Doe'
    df.drop(index=df[filt].index)
"""

Unnamed: 0,email,full_name,first,last
0,coreymsschafer@gmail.com,Chris Schafer,Chris,Schafer
2,johndoe@email.com,John Smith,John,Smith
3,IronMan@avenge.com,,Tony,Stark
4,cap@avenge.com,,Steve,Rogers
