# Column Manipulation of Pandas DataFrames

### 1. Import libraries and dependencies

In [66]:
# Import pandas, pathlib, and numpy libraries
import pandas as pd
import numpy as np
from pathlib import Path

### 2. Create a Path to the File Using Pathlib

In [67]:
# Use the Pathlib libary to set the path to the CSV
data = r'C:\Users\TribThapa\Desktop\Thapa\ResearchFellow\Courses\FinTech_Bootcamp_MonashUni2021\monu-mel-virt-fin-pt-05-2021-u-c\Activities\Week 4\2\02-columns-01\Resources\people.csv'

### 3. Read the CSV into a Pandas DataFrame and Display a Few Rows

In [68]:
# Use the file path to read the CSV into a DataFrame and display a few rows
data_df = pd.read_csv(data)
data_df.head(10)

Unnamed: 0,id,first_name,last_name,email,gender,uni_grad,job_title,Income
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
3,4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0
5,6.0,Antonio,Snozzwell,asnozzwell5@mysql.com,Male,Babcock University,Technical Writer,119916.0
6,7.0,Darya,Neathway,dneathway6@seesaa.net,Female,Molloy College,Administrative Officer,77705.0
7,8.0,Helaina,Duding,hduding7@topsy.com,Female,St. Paul University,Staff Scientist,57166.0
8,9.0,Gerhardine,Franzolini,gfranzolini8@msn.com,Female,Fundação Educacional de Ituverava,Environmental Specialist,73051.0
9,10.0,Charo,Traut,ctraut9@oracle.com,Female,Cornell University,Programmer III,90631.0


### 4. View Column Names

In [69]:
# Use the `columns` attribute to output the column names
data_df.columns

Index(['id', 'first_name', 'last_name', 'email', 'gender', 'uni_grad',
       'job_title', 'Income'],
      dtype='object')

### 5. View Column Data Types

In [70]:
# Use the `dtypes` attribute to output the column names and data types
data_df.dtypes

id            float64
first_name     object
last_name      object
email          object
gender         object
uni_grad       object
job_title      object
Income        float64
dtype: object

### 6. Rename Columns Using List

In [71]:
# Set the `columns` attribute to a new list of column names
col_names = ['Person_ID', 'First_Name', 'Last_Name', 'Email', 'Gender', 'University', 'Occupation', 'Salary']

data_df.columns = col_names
data_df.head(10)

Unnamed: 0,Person_ID,First_Name,Last_Name,Email,Gender,University,Occupation,Salary
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
3,4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0
5,6.0,Antonio,Snozzwell,asnozzwell5@mysql.com,Male,Babcock University,Technical Writer,119916.0
6,7.0,Darya,Neathway,dneathway6@seesaa.net,Female,Molloy College,Administrative Officer,77705.0
7,8.0,Helaina,Duding,hduding7@topsy.com,Female,St. Paul University,Staff Scientist,57166.0
8,9.0,Gerhardine,Franzolini,gfranzolini8@msn.com,Female,Fundação Educacional de Ituverava,Environmental Specialist,73051.0
9,10.0,Charo,Traut,ctraut9@oracle.com,Female,Cornell University,Programmer III,90631.0


In [72]:
data_df.head(10)

Unnamed: 0,Person_ID,First_Name,Last_Name,Email,Gender,University,Occupation,Salary
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
3,4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0
5,6.0,Antonio,Snozzwell,asnozzwell5@mysql.com,Male,Babcock University,Technical Writer,119916.0
6,7.0,Darya,Neathway,dneathway6@seesaa.net,Female,Molloy College,Administrative Officer,77705.0
7,8.0,Helaina,Duding,hduding7@topsy.com,Female,St. Paul University,Staff Scientist,57166.0
8,9.0,Gerhardine,Franzolini,gfranzolini8@msn.com,Female,Fundação Educacional de Ituverava,Environmental Specialist,73051.0
9,10.0,Charo,Traut,ctraut9@oracle.com,Female,Cornell University,Programmer III,90631.0


### 7. Rename Columns Using Dictionary

In [73]:
# Use the `rename` function and set the `columns` parameter to a dictionary of new column names
df_new = data_df.rename(columns={'id': 'Person_ID',
                                 'first_name': 'First_Name',
                                 'last_name': 'Last_Name',
                                 'email': 'Email',
                                 'gender': 'Gender',
                                 'uni_grad': 'University',
                                 'job_title': 'Occupation',
                                 'Income': 'Salary'})
df_new.head(10)

Unnamed: 0,Person_ID,First_Name,Last_Name,Email,Gender,University,Occupation,Salary
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
3,4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0
5,6.0,Antonio,Snozzwell,asnozzwell5@mysql.com,Male,Babcock University,Technical Writer,119916.0
6,7.0,Darya,Neathway,dneathway6@seesaa.net,Female,Molloy College,Administrative Officer,77705.0
7,8.0,Helaina,Duding,hduding7@topsy.com,Female,St. Paul University,Staff Scientist,57166.0
8,9.0,Gerhardine,Franzolini,gfranzolini8@msn.com,Female,Fundação Educacional de Ituverava,Environmental Specialist,73051.0
9,10.0,Charo,Traut,ctraut9@oracle.com,Female,Cornell University,Programmer III,90631.0


### 8. Re-order Columns

In [74]:
# Use a list of re-ordered column names to alter the column order of the original DataFrame
cols = list(data_df.columns)
cols

data_df = data_df[data_df.columns[[1,2,3,5,6,7,0]]]
data_df.head()

Unnamed: 0,First_Name,Last_Name,Email,University,Occupation,Salary,Person_ID
0,Keriann,Lenormand,klenormand0@businessinsider.com,Aurora University,Nurse Practicioner,58135.0,1.0
1,Huntley,Rupke,hrupke1@reuters.com,Osaka University of Economics,Project Manager,96053.0,2.0
2,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Ludong University,Environmental Tech,59196.0,3.0
3,Cullie,,cputten3@nymag.com,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,4.0
4,Ariel,Strangman,astrangman4@bravesites.com,Boise State University,Project Manager,89073.0,5.0


### 9. Create Additional Column

In [75]:
data_df.head()

Unnamed: 0,First_Name,Last_Name,Email,University,Occupation,Salary,Person_ID
0,Keriann,Lenormand,klenormand0@businessinsider.com,Aurora University,Nurse Practicioner,58135.0,1.0
1,Huntley,Rupke,hrupke1@reuters.com,Osaka University of Economics,Project Manager,96053.0,2.0
2,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Ludong University,Environmental Tech,59196.0,3.0
3,Cullie,,cputten3@nymag.com,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,4.0
4,Ariel,Strangman,astrangman4@bravesites.com,Boise State University,Project Manager,89073.0,5.0


In [76]:
# Use the `randint` function to randomly generate an `Age` from 22 to 65 for 1000 rows
import random

age = []

for i in range(len(data_df)):
               x = random.randint(22, 65)
               age.append(x)
#print(age)    
data_df['Age'] = age
data_df.head()

Unnamed: 0,First_Name,Last_Name,Email,University,Occupation,Salary,Person_ID,Age
0,Keriann,Lenormand,klenormand0@businessinsider.com,Aurora University,Nurse Practicioner,58135.0,1.0,63
1,Huntley,Rupke,hrupke1@reuters.com,Osaka University of Economics,Project Manager,96053.0,2.0,27
2,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Ludong University,Environmental Tech,59196.0,3.0,22
3,Cullie,,cputten3@nymag.com,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,4.0,49
4,Ariel,Strangman,astrangman4@bravesites.com,Boise State University,Project Manager,89073.0,5.0,32


### 10. Delete Columns

In [77]:
# Use the `drop` function to delete the newly created `Age_Copy` column
data_df['Age_Copy'] = age
data_df.head()

Unnamed: 0,First_Name,Last_Name,Email,University,Occupation,Salary,Person_ID,Age,Age_Copy
0,Keriann,Lenormand,klenormand0@businessinsider.com,Aurora University,Nurse Practicioner,58135.0,1.0,63,63
1,Huntley,Rupke,hrupke1@reuters.com,Osaka University of Economics,Project Manager,96053.0,2.0,27,27
2,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Ludong University,Environmental Tech,59196.0,3.0,22,22
3,Cullie,,cputten3@nymag.com,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,4.0,49,49
4,Ariel,Strangman,astrangman4@bravesites.com,Boise State University,Project Manager,89073.0,5.0,32,32


In [81]:
data_df_new = data_df.drop('Age_Copy', 1) #axis number: 0=rows, 1=columns
data_df_new.head()

Unnamed: 0,First_Name,Last_Name,Email,University,Occupation,Salary,Person_ID,Age
0,Keriann,Lenormand,klenormand0@businessinsider.com,Aurora University,Nurse Practicioner,58135.0,1.0,63
1,Huntley,Rupke,hrupke1@reuters.com,Osaka University of Economics,Project Manager,96053.0,2.0,27
2,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Ludong University,Environmental Tech,59196.0,3.0,22
3,Cullie,,cputten3@nymag.com,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,4.0,49
4,Ariel,Strangman,astrangman4@bravesites.com,Boise State University,Project Manager,89073.0,5.0,32


### 11. Save Modified DataFrame to New CSV

In [82]:
# Save the DataFrame to the `Resources` folder
OutDir = r'..\Resources\Thapa_edited.csv'

data_df_new.to_csv(OutDir, sep=',')