# Updating Values

## Smaller Dataset

In [1]:
people = {
    "first" : ["Corey", "Jane", "John"],
    "last" : ["Schafer", "Doe", "Doe"],
    "email" : ["Corey@gmail.com", "Jane@gmail.com", "John@gmail.com"]
}

In [2]:
import pandas as pd

In [3]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,Corey@gmail.com
1,Jane,Doe,Jane@gmail.com
2,John,Doe,John@gmail.com


### Updating Columns

In [5]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [6]:
df.columns = ['first_name', 'last_name', 'email']

In [7]:
df.columns

Index(['first_name', 'last_name', 'email'], dtype='object')

In [8]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,Corey@gmail.com
1,Jane,Doe,Jane@gmail.com
2,John,Doe,John@gmail.com


In [9]:
df.rename(columns={'first_name' : 'first', 'last_name' : 'last'}, inplace=True)

In [10]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,Corey@gmail.com
1,Jane,Doe,Jane@gmail.com
2,John,Doe,John@gmail.com


### Updating Rows

In [11]:
df.loc[2] = ['John', 'Smith', 'johnsmith@gmail.com']

In [12]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,Corey@gmail.com
1,Jane,Doe,Jane@gmail.com
2,John,Smith,johnsmith@gmail.com


In [13]:
df.loc[2, ['last', 'email']] = ['Doe', 'JohnDoe@gmail.com']

In [14]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,Corey@gmail.com
1,Jane,Doe,Jane@gmail.com
2,John,Doe,JohnDoe@gmail.com


In [15]:
df.loc[2, 'first'] = 'John'
df

Unnamed: 0,first,last,email
0,Corey,Schafer,Corey@gmail.com
1,Jane,Doe,Jane@gmail.com
2,John,Doe,JohnDoe@gmail.com


In [16]:
filt = (df['email'] == 'JohnDoe@gmail.com')
df[filt]

Unnamed: 0,first,last,email
2,John,Doe,JohnDoe@gmail.com


In [17]:
# Not the right way

df[filt]['email'] = 'johndoe@gmail.com' 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [18]:
# The right way

df.loc[filt, 'email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,Corey,Schafer,Corey@gmail.com
1,Jane,Doe,Jane@gmail.com
2,John,Doe,johndoe@gmail.com


In [19]:
df['email'] = df['email'].str.lower()

In [20]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,corey@gmail.com
1,Jane,Doe,jane@gmail.com
2,John,Doe,johndoe@gmail.com


# The four methods:

## apply

## map

## applymap

## replace

## 1. apply()

Works on Series or columns
Apply method to each value of Series

In [32]:
df['email'].apply(len)

0    15
1    14
2    17
Name: email, dtype: int64

In [33]:
def toUpperCase(x):
    return x.upper()

In [34]:
df['email'] = df['email'].apply(toUpperCase)

In [35]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,COREY@GMAIL.COM
1,Jane,Doe,JANE@GMAIL.COM
2,John,Doe,JOHNDOE@GMAIL.COM


###### Same using lamda

In [36]:
df['email'] = df['email'].apply(lambda x: x.lower())

In [37]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,corey@gmail.com
1,Jane,Doe,jane@gmail.com
2,John,Doe,johndoe@gmail.com


In [38]:
len(df['email']) # No of rows

3

## 2. applymap()

Works only on DataFrames
Apply value to each value in DataFrame

In [39]:
df.applymap(lambda x: x.upper())

Unnamed: 0,first,last,email
0,COREY,SCHAFER,COREY@GMAIL.COM
1,JANE,DOE,JANE@GMAIL.COM
2,JOHN,DOE,JOHNDOE@GMAIL.COM


In [40]:
df.applymap(lambda x: x.lower())

Unnamed: 0,first,last,email
0,corey,schafer,corey@gmail.com
1,jane,doe,jane@gmail.com
2,john,doe,johndoe@gmail.com


## 3. map()

Works only on series. Other values become NaN

In [45]:
df['first'].map({'Corey' : 'Chris', 'Jane' : 'Mary'})

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

## 4. replace()

Works only on series. Note, only changed values are replaced. Others remain same.

In [46]:
df['first'].replace({'Corey' : 'Chris', 'Jane' : 'Mary'})

0    Chris
1     Mary
2     John
Name: first, dtype: object

In [47]:
df = pd.read_csv('data/survey_results_public.csv')

In [51]:
df.rename(columns={'ConvertedComp' : 'Salary'}, inplace=True)

In [54]:
df.columns

Index(['Respondent', 'MainBranch', 'Hobbyist', 'OpenSourcer', 'OpenSource',
       'Employment', 'Country', 'Student', 'EdLevel', 'UndergradMajor',
       'EduOther', 'OrgSize', 'DevType', 'YearsCode', 'Age1stCode',
       'YearsCodePro', 'CareerSat', 'JobSat', 'MgrIdiot', 'MgrMoney',
       'MgrWant', 'JobSeek', 'LastHireDate', 'LastInt', 'FizzBuzz',
       'JobFactors', 'ResumeUpdate', 'CurrencySymbol', 'CurrencyDesc',
       'CompTotal', 'CompFreq', 'Salary', 'WorkWeekHrs', 'WorkPlan',
       'WorkChallenge', 'WorkRemote', 'WorkLoc', 'ImpSyn', 'CodeRev',
       'CodeRevHrs', 'UnitTests', 'PurchaseHow', 'PurchaseWhat',
       'LanguageWorkedWith', 'LanguageDesireNextYear', 'DatabaseWorkedWith',
       'DatabaseDesireNextYear', 'PlatformWorkedWith',
       'PlatformDesireNextYear', 'WebFrameWorkedWith',
       'WebFrameDesireNextYear', 'MiscTechWorkedWith',
       'MiscTechDesireNextYear', 'DevEnviron', 'OpSys', 'Containers',
       'BlockchainOrg', 'BlockchainIs', 'BetterLife', 'ITpe

In [55]:
df['Hobbyist']

0        Yes
1         No
2        Yes
3         No
4        Yes
5        Yes
6         No
7        Yes
8        Yes
9        Yes
10       Yes
11        No
12       Yes
13       Yes
14       Yes
15       Yes
16       Yes
17       Yes
18       Yes
19        No
20       Yes
21       Yes
22       Yes
23       Yes
24        No
25       Yes
26       Yes
27       Yes
28       Yes
29        No
        ... 
88853    Yes
88854    Yes
88855     No
88856     No
88857     No
88858     No
88859     No
88860    Yes
88861     No
88862    Yes
88863    Yes
88864    Yes
88865    Yes
88866    Yes
88867    Yes
88868     No
88869     No
88870    Yes
88871    Yes
88872    Yes
88873     No
88874     No
88875    Yes
88876     No
88877    Yes
88878    Yes
88879     No
88880     No
88881     No
88882    Yes
Name: Hobbyist, Length: 88883, dtype: object