In [1]:
# first table
people = {
    'first': ['Corey','Jane','John'],
    'last': ['Schafer','Doe','Doe'],
    'email':['CoreyMSchafer@gmail.com','JaneDoe@email.com','JohnDoe@email.com']
}

In [2]:
import pandas as pd

In [3]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [5]:
# how to add column
# combine first name and last name into a single column

df['first'] + ' ' +  df['last']

0    Corey Schafer
1         Jane Doe
2         John Doe
dtype: object

In [6]:
# add a new column
# use apply for mathematical analysis for adding columns

df['full_name'] = df['first'] + ' ' +  df['last']

In [7]:
df

Unnamed: 0,first,last,email,full_name
0,Corey,Schafer,CoreyMSchafer@gmail.com,Corey Schafer
1,Jane,Doe,JaneDoe@email.com,Jane Doe
2,John,Doe,JohnDoe@email.com,John Doe


In [8]:
# how to remove columns
# this is not permanent, put inplace = True if you want permanent change

df.drop(columns= ['first','last'])

Unnamed: 0,email,full_name
0,CoreyMSchafer@gmail.com,Corey Schafer
1,JaneDoe@email.com,Jane Doe
2,JohnDoe@email.com,John Doe


In [9]:
df

Unnamed: 0,first,last,email,full_name
0,Corey,Schafer,CoreyMSchafer@gmail.com,Corey Schafer
1,Jane,Doe,JaneDoe@email.com,Jane Doe
2,John,Doe,JohnDoe@email.com,John Doe


In [10]:
df.drop(columns= ['first','last'], inplace = True)

In [11]:
# now no longer has first and last column
df

Unnamed: 0,email,full_name
0,CoreyMSchafer@gmail.com,Corey Schafer
1,JaneDoe@email.com,Jane Doe
2,JohnDoe@email.com,John Doe


In [12]:
# if we wanted to reverse and split the full name column
# use split method and split on space

df['full_name'].str.split(' ')

0    [Corey, Schafer]
1         [Jane, Doe]
2         [John, Doe]
Name: full_name, dtype: object

In [13]:
# if we want to assign this to two different columns
# use the expand argument

In [14]:
df['full_name'].str.split(' ', expand = True)

Unnamed: 0,0,1
0,Corey,Schafer
1,Jane,Doe
2,John,Doe


In [15]:
# set or add this to dataframe
# pass in a list

df[['first','last']]= df['full_name'].str.split(' ', expand = True)

In [16]:
# we can see that now we added the new column

df

Unnamed: 0,email,full_name,first,last
0,CoreyMSchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@email.com,Jane Doe,Jane,Doe
2,JohnDoe@email.com,John Doe,John,Doe


In [31]:
# adding a single row of data
# use append method
# if you run this, it will give an error
# because it doesn't have an index

df = df.append({'first':'Tony'}, ignore_index = True)

  df = df.append({'first':'Tony'}, ignore_index = True)


In [32]:
# Tony now appears
df

Unnamed: 0,email,full_name,first,last
0,CoreyMSchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@email.com,Jane Doe,Jane,Doe
2,JohnDoe@email.com,John Doe,John,Doe
3,,,Tony,


In [19]:
# another way to do this
# we will just ignore the index and our dataframe will
# automatically assign a new row to prevent this error
# we can see that Tony is already added

# df.append({'first': 'Tony'}, ignore_index = True)

In [33]:
# append a new dataframe to the existing dataframe

people = {
    'email': ['IronMan@avenge.com','Cap@avenge.com'],
    'full_name': ['Tony Stark','Steve Rogers'],
    'first':['Tony','Steve'],
    'last':['Stark','Rogers']
}

df2 = pd.DataFrame(people)

In [34]:
df2

Unnamed: 0,email,full_name,first,last
0,IronMan@avenge.com,Tony Stark,Tony,Stark
1,Cap@avenge.com,Steve Rogers,Steve,Rogers


In [22]:
# add this to our existing dataframe
# notice this have conflicting indexes (0,1)
# if we got a warning, it is because there is column given that is set in order

# df = pd.concat([df,df2])

In [35]:
df

Unnamed: 0,email,full_name,first,last
0,CoreyMSchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@email.com,Jane Doe,Jane,Doe
2,JohnDoe@email.com,John Doe,John,Doe
3,,,Tony,


In [36]:
# add two dataframes, use concat as append is already deprecated
# put ignore_index = True to prevent error
df = pd.concat([df,df2], ignore_index = True, axis = 0, sort = False)

In [37]:
df

Unnamed: 0,email,full_name,first,last
0,CoreyMSchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@email.com,Jane Doe,Jane,Doe
2,JohnDoe@email.com,John Doe,John,Doe
3,,,Tony,
4,IronMan@avenge.com,Tony Stark,Tony,Stark
5,Cap@avenge.com,Steve Rogers,Steve,Rogers


In [38]:
# drop row / index
# pass in the indexes that we want to drop

df.drop(index = 4)

Unnamed: 0,email,full_name,first,last
0,CoreyMSchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@email.com,Jane Doe,Jane,Doe
2,JohnDoe@email.com,John Doe,John,Doe
3,,,Tony,
5,Cap@avenge.com,Steve Rogers,Steve,Rogers


In [40]:
# if you want to apply to dataframe permanently
# use inplace = True

df.drop(index = 4, inplace = True)

In [41]:
df

Unnamed: 0,email,full_name,first,last
0,CoreyMSchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@email.com,Jane Doe,Jane,Doe
2,JohnDoe@email.com,John Doe,John,Doe
3,,,Tony,
5,Cap@avenge.com,Steve Rogers,Steve,Rogers


In [44]:
# wanted to drop all of the rows where last name is ==
# pass in the indexes of that filter
# put the conditional inside "[df['last']=='Doe']"

df.drop(index = df[df['last']=='Doe'].index)

# can also pull conditional out to make it easy to read

filt = df['last']=='Doe'
df.drop(index = df[filt].index)

Unnamed: 0,email,full_name,first,last
0,CoreyMSchafer@gmail.com,Corey Schafer,Corey,Schafer
3,,,Tony,
5,Cap@avenge.com,Steve Rogers,Steve,Rogers


In [43]:
# https://www.youtube.com/watch?v=WGOEFok1szA

In [45]:
# JUNE 25, 2022 10:29 AM