# Column Manipulation of Pandas DataFrames

### 1. Import libraries and dependencies

In [4]:
# Import pandas, pathlib, and numpy libraries
import pandas as pd
import numpy as np
from pathlib import Path

### 2. Create a Path to the File Using Pathlib

In [5]:
# Use the Pathlib libary to set the path to the CSV
data = r'C:\Users\TribThapa\Desktop\Thapa\ResearchFellow\Courses\FinTech_Bootcamp_MonashUni2021\monu-mel-virt-fin-pt-05-2021-u-c\Activities\Week 4\2\02-columns-01\Resources\people.csv'

### 3. Read the CSV into a Pandas DataFrame and Display a Few Rows

In [21]:
# Use the file path to read the CSV into a DataFrame and display a few rows
data_df = pd.read_csv(data) 
data_df.head()

Unnamed: 0,id,first_name,last_name,email,gender,uni_grad,job_title,Income
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
3,4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0


### 4. View Column Names

In [17]:
# Use the `columns` attribute to output the column names
data_df.columns

Index(['id', 'first_name', 'last_name', 'email', 'gender', 'uni_grad',
       'job_title', 'Income'],
      dtype='object')

### 5. View Column Data Types

In [18]:
# Use the `dtypes` attribute to output the column names and data types
data_df.dtypes

id            float64
first_name     object
last_name      object
email          object
gender         object
uni_grad       object
job_title      object
Income        float64
dtype: object

### 6. Rename Columns Using List

In [20]:
# Set the `columns` attribute to a new list of column names
col_names = ['ID', 'First Name', 'Last Name', 'Email', 'Gender', 'University', 'Job', 'Income']

data_df.columns = [col_names]
data_df

Unnamed: 0,ID,First Name,Last Name,Email,Gender,University,Job,Income
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
3,4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0
...,...,...,...,...,...,...,...,...
995,996.0,Meta,Crumpton,mcrumptonrn@qq.com,Female,ECAM - Institut Supérieur Industriel,Registered Nurse,57060.0
996,997.0,Gunar,Gilford,ggilfordro@yandex.ru,Male,Smolny University,Marketing Manager,76109.0
997,998.0,Lucretia,Gurling,lgurlingrp@de.vu,Female,Institut Teknologi Telkom,Software Engineer III,92115.0
998,999.0,Andrew,Yang,ayang@codedrills.com,Male,Rutgers University School of Business,Curriculum Engineer,60000.0


### 7. Rename Columns Using Dictionary

In [23]:
# Use the `rename` function and set the `columns` parameter to a dictionary of new column names
data_new_df = data_df.rename(columns={'id': 'ID',
                                      'first_name': 'First Name',
                                      'last_name': 'Last Name',
                                      'email': 'Email address',
                                      'gender': 'Sex',
                                      'uni_grad': 'University',
                                      'job_title': 'Career',
                                      'Income': 'Pay'})

data_new_df.head()

Unnamed: 0,ID,First Name,Last Name,Email address,Sex,University,Career,Pay
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
3,4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0


### 8. Re-order Columns

In [29]:
# Use a list of re-ordered column names to alter the column order of the original DataFrame
column_list = list(data_df.columns)
column_list

new_df = data_df[data_df.columns[[0, 3, 2, 1, 4, 7, 5]]]
new_df.head()

Unnamed: 0,id,email,last_name,first_name,gender,Income,uni_grad
0,1.0,klenormand0@businessinsider.com,Lenormand,Keriann,Female,58135.0,Aurora University
1,2.0,hrupke1@reuters.com,Rupke,Huntley,Male,96053.0,Osaka University of Economics
2,3.0,gdalgarnowch2@microsoft.com,Dalgarnowch,Gorden,Male,59196.0,Ludong University
3,4.0,cputten3@nymag.com,,Cullie,Male,88493.0,Université des Sciences et de la Technologie d...
4,5.0,astrangman4@bravesites.com,Strangman,Ariel,Female,89073.0,Boise State University


### 9. Create Additional Column

In [30]:
data_df['Income / 100'] = data_df['Income'] / 100
data_df

Unnamed: 0,id,first_name,last_name,email,gender,uni_grad,job_title,Income,Income / 100
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0,581.35
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0,960.53
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0,591.96
3,4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,884.93
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0,890.73
...,...,...,...,...,...,...,...,...,...
995,996.0,Meta,Crumpton,mcrumptonrn@qq.com,Female,ECAM - Institut Supérieur Industriel,Registered Nurse,57060.0,570.60
996,997.0,Gunar,Gilford,ggilfordro@yandex.ru,Male,Smolny University,Marketing Manager,76109.0,761.09
997,998.0,Lucretia,Gurling,lgurlingrp@de.vu,Female,Institut Teknologi Telkom,Software Engineer III,92115.0,921.15
998,999.0,Andrew,Yang,ayang@codedrills.com,Male,Rutgers University School of Business,Curriculum Engineer,60000.0,600.00


In [42]:
# Use the `randint` function to randomly generate an `Age` from 22 to 65 for 1000 rows
import random

age = []

for i in range(len(data_df)):
    i = random.randint(22, 65)
    age.append(i)

print(age)

data_df['Age'] = age

data_df.head()

[32, 48, 54, 23, 48, 24, 30, 29, 51, 27, 62, 40, 55, 58, 43, 30, 42, 23, 35, 44, 41, 49, 56, 55, 30, 60, 29, 33, 34, 48, 51, 57, 28, 24, 43, 37, 63, 49, 59, 30, 51, 37, 41, 58, 39, 48, 50, 49, 26, 46, 61, 44, 23, 36, 40, 47, 36, 45, 48, 35, 31, 59, 31, 35, 25, 61, 52, 56, 60, 54, 24, 54, 33, 56, 52, 58, 41, 48, 45, 38, 25, 58, 23, 23, 25, 65, 28, 50, 49, 52, 42, 36, 57, 31, 28, 34, 27, 64, 45, 56, 26, 46, 64, 59, 37, 56, 41, 53, 33, 54, 42, 29, 57, 31, 50, 61, 46, 41, 48, 23, 60, 28, 41, 55, 32, 34, 35, 55, 47, 36, 60, 45, 33, 59, 46, 23, 55, 53, 36, 36, 63, 61, 58, 63, 52, 63, 32, 45, 61, 25, 58, 28, 61, 38, 28, 57, 51, 26, 44, 27, 41, 59, 65, 64, 48, 52, 25, 26, 55, 42, 36, 26, 22, 45, 33, 56, 33, 58, 45, 27, 62, 37, 49, 56, 46, 47, 30, 38, 26, 59, 51, 63, 47, 32, 45, 63, 41, 29, 63, 45, 35, 37, 38, 31, 45, 37, 40, 49, 53, 22, 45, 52, 38, 40, 48, 28, 53, 38, 45, 23, 63, 41, 57, 24, 57, 61, 26, 51, 55, 36, 29, 56, 43, 63, 58, 23, 51, 55, 64, 23, 57, 22, 57, 40, 57, 25, 43, 33, 34, 56,

Unnamed: 0,id,first_name,last_name,email,gender,uni_grad,job_title,Income,Income / 100,Age
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0,581.35,32
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0,960.53,48
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0,591.96,54
3,4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,884.93,23
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0,890.73,48


In [44]:
data_df['Age_copy'] = age
data_df.head()

Unnamed: 0,id,first_name,last_name,email,gender,uni_grad,job_title,Income,Income / 100,Age,Age_copy
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0,581.35,32,32
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0,960.53,48,48
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0,591.96,54,54
3,4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,884.93,23,23
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0,890.73,48,48


### 10. Delete Columns

In [47]:
# Use the `drop` function to delete the newly created `Age_Copy` column
data_drop_df = data_df.drop(columns=['Age_copy'])
data_drop_df

Unnamed: 0,id,first_name,last_name,email,gender,uni_grad,job_title,Income,Income / 100,Age
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0,581.35,32
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0,960.53,48
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0,591.96,54
3,4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,884.93,23
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0,890.73,48
...,...,...,...,...,...,...,...,...,...,...
995,996.0,Meta,Crumpton,mcrumptonrn@qq.com,Female,ECAM - Institut Supérieur Industriel,Registered Nurse,57060.0,570.60,22
996,997.0,Gunar,Gilford,ggilfordro@yandex.ru,Male,Smolny University,Marketing Manager,76109.0,761.09,34
997,998.0,Lucretia,Gurling,lgurlingrp@de.vu,Female,Institut Teknologi Telkom,Software Engineer III,92115.0,921.15,35
998,999.0,Andrew,Yang,ayang@codedrills.com,Male,Rutgers University School of Business,Curriculum Engineer,60000.0,600.00,58


### 11. Save Modified DataFrame to New CSV

In [48]:
# Save the DataFrame to the `Resources` folder

data_drop_df.to_csv('ThapaTest.csv')
