In [1]:
import pandas as pd

In [2]:
url = 'https://raw.githubusercontent.com/chrisalbon/sim_data/master/titanic.csv'
# Load data
dataframe = pd.read_csv(url)
# Sort the dataframe by age, show two rows
dataframe.sort_values(by=["Age"]).head(2)


Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
763,"Dean, Miss Elizabeth Gladys (Millvena)",3rd,0.17,female,1,1
751,"Danbom, Master Gilbert Sigvard Emanuel",3rd,0.33,male,0,0


## Replacing Values

In [5]:
dataframe['Sex'].replace("female", "Woman").head(5)

0    Woman
1    Woman
2     male
3    Woman
4     male
Name: Sex, dtype: object

In [4]:
dataframe.head(5)

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,1
2,"Allison, Mr Hudson Joshua Creighton",1st,30.0,male,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,25.0,female,0,1
4,"Allison, Master Hudson Trevor",1st,0.92,male,1,0


In [6]:
# Finding and replacing across the entire dataframe instead of just single columns.

dataframe.replace(1, "One").head(5)

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,One,One
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,One
2,"Allison, Mr Hudson Joshua Creighton",1st,30.0,male,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,25.0,female,0,One
4,"Allison, Master Hudson Trevor",1st,0.92,male,One,0


In [7]:
# The replace can also accept regular expression or regex.

dataframe.replace(r"1st", "First", regex=True).head(5)

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",First,29.0,female,1,1
1,"Allison, Miss Helen Loraine",First,2.0,female,0,1
2,"Allison, Mr Hudson Joshua Creighton",First,30.0,male,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",First,25.0,female,0,1
4,"Allison, Master Hudson Trevor",First,0.92,male,1,0


## Renaming Columns.

In [9]:
dataframe.rename(columns={'PClass': 'Passenger Class'}).head(2)

Unnamed: 0,Name,Passenger Class,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,1


In [10]:
# Renaming a dictionary as an argument to the columns parameter is preffered as it renames multiples at once.

import collections

In [11]:
# creating dictionary.
column_names = collections.defaultdict(str)

In [12]:
# Create keys
for name in dataframe.columns:
    column_names[name]

In [13]:
column_names

defaultdict(str,
            {'Name': '',
             'PClass': '',
             'Age': '',
             'Sex': '',
             'Survived': '',
             'SexCode': ''})