In [1]:
import pandas as pd

In [2]:
people = {
    'first': ['Corey', 'Jane', 'John'],
    'last': ['Schafer', 'Doe', 'Doe'],
    'email': ['CoreyMSchafer@gmail.com', 'JaneDoe@gmail.com', 'JohnDoe@gmail.com']
}

In [3]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com


In [5]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [6]:
# what happens when we want to change the column names? 'first' >> 'first names' etc

In [7]:
# renaming all of the columns, assigning new column names

In [11]:
df.columns = ['first_name', 'last_name', 'email']

In [12]:
df.columns

Index(['first_name', 'last_name', 'email'], dtype='object')

In [10]:
# this has effectively changed my column names

In [13]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com


In [14]:
# these changes show up in the dataframe as well

In [15]:
# we only use this when want to change the names of all of the columns in our dataframe
# but most of the time, we only want to change the name of a few select columns

In [17]:
# renaming specific column names

In [26]:
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com


In [27]:
df.columns = df.columns.str.replace(' ', '_')

In [28]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com


In [32]:
df.rename(columns={'first_name': 'first', 'last_name': 'last'}, inplace=True)
# we have to use the inplace=True function to solidify this change

In [33]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com


In [39]:
df.loc[2] = ['John', 'Smith', 'JohnSmith@gmail.com']
# this is for when we want to update a single existing row

In [40]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Smith,JohnSmith@gmail.com


In [46]:
df.loc[2, ['last', 'email']] = ['Adams', 'JohnAdams@gmail.com']
# this is for when we want to only change one or a few columns in one row

In [47]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Adams,JohnAdams@gmail.com


In [48]:
df.loc[2, 'last'] = 'Jacobs'
# since we are only changing one column, we do not need to pass in a list

In [49]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Jacobs,JohnAdams@gmail.com


In [52]:
df.at[2, 'email'] = 'JohnJacobs@gmail.com'
# pandas has another method, called '.at' that achieves the same thing as .loc
# you can use whicher you like, but .at may be better for performance

In [53]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Jacobs,JohnJacobs@gmail.com


In [63]:
filt = (df['email'] == 'JaneDoe@gmail.com')
# for when we want to quickly find rows that contain whatever we are looking to replace

In [67]:
df[filt]['last'] = 'Smith'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [70]:
# just use .loc or .at when you want to modify anything

In [71]:
df.loc[filt, 'last'] = 'Smith'

In [76]:
df['email']

0    CoreyMSchafer@gmail.com
1          JaneDoe@gmail.com
2       JohnJacobs@gmail.com
Name: email, dtype: object

In [77]:
df['email'] = df['email'].str.lower()
# this is assigning the 'df['email']' as a lowercase version of itself using the .str.lower() method

In [78]:
df['email']

0    coreymschafer@gmail.com
1          janedoe@gmail.com
2       johnjacobs@gmail.com
Name: email, dtype: object

In [79]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Smith,janedoe@gmail.com
2,John,Jacobs,johnjacobs@gmail.com


In [82]:
# apply method
# map method
# apply map method
# replace method

In [83]:
# apply method is used for calling a function on our values, and apply can be used on our data frame
# or a series object. 

In [84]:
# we want to see the length of all the email adressess 

In [85]:
df['email'].apply(len)

0    23
1    17
2    20
Name: email, dtype: int64

In [96]:
def update_email(email):
    return email.upper()

In [97]:
df['email'].apply(update_email)

0    COREYMSCHAFER@GMAIL.COM
1          JANEDOE@GMAIL.COM
2       JOHNJACOBS@GMAIL.COM
Name: email, dtype: object

In [98]:
df['email'] = df['email'].apply(update_email)

In [99]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,COREYMSCHAFER@GMAIL.COM
1,Jane,Smith,JANEDOE@GMAIL.COM
2,John,Jacobs,JOHNJACOBS@GMAIL.COM


In [100]:
df['email'] = df['email'].apply(lambda x: x.lower())

In [103]:
df['email'].apply(len)

0    23
1    17
2    20
Name: email, dtype: int64

In [110]:
df.apply(len) # without axis = 'columns' is axis = 'rows' by default

first    3
last     3
email    3
dtype: int64

In [None]:
# the '3' is indicating that there are three rows for each column (values = Corey, Jane, John)

In [108]:
df.apply(len, axis = 'columns')

0    3
1    3
2    3
dtype: int64

In [111]:
# the '3' is indicating that there are three values for each row (values = Corey, Schafer, CoreyMSchafer@gmail.com)

In [116]:
df.apply(pd.Series.min)

first                      Corey
last                      Jacobs
email    coreymschafer@gmail.com
dtype: object

In [114]:
# when we use the pd.Series.min function in an all string data frame, it will simply
# return the first str value that is in alphabetical order

In [117]:
df.apply(lambda x: x.min())

first                      Corey
last                      Jacobs
email    coreymschafer@gmail.com
dtype: object

In [118]:
df.applymap(len)

Unnamed: 0,first,last,email
0,5,7,23
1,4,5,17
2,4,6,20


In [121]:
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,corey,schafer,coreymschafer@gmail.com
1,jane,smith,janedoe@gmail.com
2,john,jacobs,johnjacobs@gmail.com


In [124]:
df['first'].map({'Corey': 'Chris', 'Jane': 'Mary'})

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

In [126]:
# this seems similar to the .loc and .at indexers to assign new values

In [128]:
df['first'] = df['first'].replace({'Corey': 'Chris', 'Jane': 'Mary'})

In [129]:
df

Unnamed: 0,first,last,email
0,Chris,Schafer,coreymschafer@gmail.com
1,Mary,Smith,janedoe@gmail.com
2,John,Jacobs,johnjacobs@gmail.com
