In [1]:
people = {
    'first': ['corey', 'jane', 'john'],
    'last': ['shaefer', 'doe', 'doe'],
    'email': ['rob@gmail.com', 'janegmail.com', 'john@gmail.com']
}

In [2]:
import pandas as pd

In [3]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first,last,email
0,corey,shaefer,rob@gmail.com
1,jane,doe,janegmail.com
2,john,doe,john@gmail.com


In [5]:
df['email']

0     rob@gmail.com
1     janegmail.com
2    john@gmail.com
Name: email, dtype: object

In [6]:
df.set_index('email', inplace=True)

In [7]:
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
rob@gmail.com,corey,shaefer
janegmail.com,jane,doe
john@gmail.com,john,doe


# indexes

In [8]:
df.index

Index(['rob@gmail.com', 'janegmail.com', 'john@gmail.com'], dtype='object', name='email')

In [9]:
df.loc['janegmail.com', 'last'] 

'doe'

In [10]:
df.reset_index(inplace=True)
df.head()

Unnamed: 0,email,first,last
0,rob@gmail.com,corey,shaefer
1,janegmail.com,jane,doe
2,john@gmail.com,john,doe


In [11]:
filt = (df['last'] == 'shaefer') | (df['first'] == 'john')

In [12]:
df.loc[~filt, 'email']

1    janegmail.com
Name: email, dtype: object

In [13]:
df.columns

Index(['email', 'first', 'last'], dtype='object')

In [14]:
df.columns = ['email', 'first_name', 'last_name']
df.columns

Index(['email', 'first_name', 'last_name'], dtype='object')

In [15]:
df.columns = [x.lower() for x in df.columns]
df.columns

Index(['email', 'first_name', 'last_name'], dtype='object')

In [16]:
df

Unnamed: 0,email,first_name,last_name
0,rob@gmail.com,corey,shaefer
1,janegmail.com,jane,doe
2,john@gmail.com,john,doe


In [17]:
df.columns = df.columns.str.replace(' ', '_')
df.columns

Index(['email', 'first_name', 'last_name'], dtype='object')

In [18]:
df.rename(columns={
    'first_name': 'first',
    'last_name': 'last'
}, inplace=True)


In [19]:
df

Unnamed: 0,email,first,last
0,rob@gmail.com,corey,shaefer
1,janegmail.com,jane,doe
2,john@gmail.com,john,doe


In [20]:
df.loc[2, 'last'] = 'Havana'

In [21]:
df

Unnamed: 0,email,first,last
0,rob@gmail.com,corey,shaefer
1,janegmail.com,jane,doe
2,john@gmail.com,john,Havana


# filtering & updating rows/columns

In [22]:
filt = (df['email'] == 'john@gmail.com')

In [23]:
df.loc[filt, 'last'] = 'Smith'

In [24]:
df

Unnamed: 0,email,first,last
0,rob@gmail.com,corey,shaefer
1,janegmail.com,jane,doe
2,john@gmail.com,john,Smith


In [25]:
df['email'] = df['email'].str.lower()

In [26]:
df

Unnamed: 0,email,first,last
0,rob@gmail.com,corey,shaefer
1,janegmail.com,jane,doe
2,john@gmail.com,john,Smith


In [27]:
df['email'].apply(len)

0    13
1    13
2    14
Name: email, dtype: int64

In [28]:
def update_email(email):
    return(email.upper())

df['email'] = df['email'].apply(update_email)

In [29]:
df

Unnamed: 0,email,first,last
0,ROB@GMAIL.COM,corey,shaefer
1,JANEGMAIL.COM,jane,doe
2,JOHN@GMAIL.COM,john,Smith


In [30]:
df['email'] = df['email'].apply(lambda x: x.lower())

In [31]:
df

Unnamed: 0,email,first,last
0,rob@gmail.com,corey,shaefer
1,janegmail.com,jane,doe
2,john@gmail.com,john,Smith


In [32]:
df['email'].apply(len)

0    13
1    13
2    14
Name: email, dtype: int64

In [33]:
df.apply(len)

email    3
first    3
last     3
dtype: int64

In [34]:
df.apply(pd.Series.min)

email    janegmail.com
first            corey
last             Smith
dtype: object

In [35]:
df.apply(lambda x: x.min())

email    janegmail.com
first            corey
last             Smith
dtype: object

In [36]:
df.applymap(len)

Unnamed: 0,email,first,last
0,13,5,7
1,13,4,3
2,14,4,5


In [37]:
df.applymap(str.lower)

Unnamed: 0,email,first,last
0,rob@gmail.com,corey,shaefer
1,janegmail.com,jane,doe
2,john@gmail.com,john,smith


In [38]:
df

Unnamed: 0,email,first,last
0,rob@gmail.com,corey,shaefer
1,janegmail.com,jane,doe
2,john@gmail.com,john,Smith


In [39]:
df['first'].map({
    'corey': 'Chris',
    'jane': 'Mary'
})

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

In [40]:
df


Unnamed: 0,email,first,last
0,rob@gmail.com,corey,shaefer
1,janegmail.com,jane,doe
2,john@gmail.com,john,Smith


In [41]:
df['first'] = df['first'].replace({
    'corey': 'Chris',
    'jane': 'Mary'
})

In [42]:
df

Unnamed: 0,email,first,last
0,rob@gmail.com,Chris,shaefer
1,janegmail.com,Mary,doe
2,john@gmail.com,john,Smith


# add/remove rows & columns

In [43]:
df['first'] + ' ' + df['last']

0    Chris shaefer
1         Mary doe
2       john Smith
dtype: object

In [44]:
df['full_name'] = df['first'] + ' ' + df['last']
df

Unnamed: 0,email,first,last,full_name
0,rob@gmail.com,Chris,shaefer,Chris shaefer
1,janegmail.com,Mary,doe,Mary doe
2,john@gmail.com,john,Smith,john Smith


In [45]:
df.drop(columns=['first', 'last'], inplace=True)

In [46]:
df

Unnamed: 0,email,full_name
0,rob@gmail.com,Chris shaefer
1,janegmail.com,Mary doe
2,john@gmail.com,john Smith


In [47]:
df['full_name'].str.split(' ', expand=True)

Unnamed: 0,0,1
0,Chris,shaefer
1,Mary,doe
2,john,Smith


In [48]:
df[['first', 'last']] = df['full_name'].str.split(' ', expand=True)

In [49]:
df

Unnamed: 0,email,full_name,first,last
0,rob@gmail.com,Chris shaefer,Chris,shaefer
1,janegmail.com,Mary doe,Mary,doe
2,john@gmail.com,john Smith,john,Smith


In [50]:
# create a new row to append to the DataFrame
new_row = pd.DataFrame({
    'first': ['Jane']
})

# append the new row to the original DataFrame
df = pd.concat([df, new_row], ignore_index=True)

df

Unnamed: 0,email,full_name,first,last
0,rob@gmail.com,Chris shaefer,Chris,shaefer
1,janegmail.com,Mary doe,Mary,doe
2,john@gmail.com,john Smith,john,Smith
3,,,Jane,


In [51]:
people = {
    'first': ['Tony', 'Steve'],
    'last': ['Stark', 'Rogers'],
    'email': ['Tony@gmail.com', 'Steve@gmail.com'],
}
df2 = pd.DataFrame(people)
df2

Unnamed: 0,first,last,email
0,Tony,Stark,Tony@gmail.com
1,Steve,Rogers,Steve@gmail.com


In [52]:
df = pd.concat([df, df2], ignore_index=True)

In [53]:
df

Unnamed: 0,email,full_name,first,last
0,rob@gmail.com,Chris shaefer,Chris,shaefer
1,janegmail.com,Mary doe,Mary,doe
2,john@gmail.com,john Smith,john,Smith
3,,,Jane,
4,Tony@gmail.com,,Tony,Stark
5,Steve@gmail.com,,Steve,Rogers


In [54]:
df.drop(index=4, inplace=True)
df

Unnamed: 0,email,full_name,first,last
0,rob@gmail.com,Chris shaefer,Chris,shaefer
1,janegmail.com,Mary doe,Mary,doe
2,john@gmail.com,john Smith,john,Smith
3,,,Jane,
5,Steve@gmail.com,,Steve,Rogers


In [55]:
filt = df['last'] == 'doe'
df.drop(index=df[filt].index, inplace=True)
df

Unnamed: 0,email,full_name,first,last
0,rob@gmail.com,Chris shaefer,Chris,shaefer
2,john@gmail.com,john Smith,john,Smith
3,,,Jane,
5,Steve@gmail.com,,Steve,Rogers


In [56]:
df


Unnamed: 0,email,full_name,first,last
0,rob@gmail.com,Chris shaefer,Chris,shaefer
2,john@gmail.com,john Smith,john,Smith
3,,,Jane,
5,Steve@gmail.com,,Steve,Rogers


# sorting 