In [1]:
import pandas as pd
from pathlib import Path

# Column Manipulation

In [2]:
csvpath = Path("../Resources/customers.csv")

In [3]:
customer_dataframe = pd.read_csv(csvpath)
customer_dataframe.head()

Unnamed: 0,FullName,Email,Address,Zip,CreditCard,Balance
0,Altha Frederick,unhideable1966@gmail.com,67 John Maher Extension,31353,2524 2317 2139 4751,21511
1,Nickolas Harvey,allgood1803@outlook.com,1200 Madera Plaza,1922,4756 0997 9568 1329,13850
2,Jesusita Kinney,satsumas1954@yahoo.com,943 Gibb Highway,41535,3717 863466 48574,21254
3,Mose Gordon,antifowl1875@gmail.com,1073 Fell Trace,16098,5413 1700 6989 2835,5221
4,Cesar Valentine,acetaminol1979@yahoo.com,805 Marshall Promenade,99895,5173 4883 9215 4743,8300


In [4]:
customer_dataframe.columns

Index(['FullName', 'Email', 'Address', 'Zip', 'CreditCard', 'Balance'], dtype='object')

In [6]:
# we can change the columns by simply creating a list of of new column names.
# using the the .columns property of the dataframe and setting its value to new_columns.

new_columns = ['Full Name', 'Email', 'Address', 'Zip Code', 'Payment Info', 'Current Balance']
customer_dataframe.columns = new_columns
customer_dataframe.head()

Unnamed: 0,Full Name,Email,Address,Zip Code,Payment Info,Current Balance
0,Altha Frederick,unhideable1966@gmail.com,67 John Maher Extension,31353,2524 2317 2139 4751,21511
1,Nickolas Harvey,allgood1803@outlook.com,1200 Madera Plaza,1922,4756 0997 9568 1329,13850
2,Jesusita Kinney,satsumas1954@yahoo.com,943 Gibb Highway,41535,3717 863466 48574,21254
3,Mose Gordon,antifowl1875@gmail.com,1073 Fell Trace,16098,5413 1700 6989 2835,5221
4,Cesar Valentine,acetaminol1979@yahoo.com,805 Marshall Promenade,99895,5173 4883 9215 4743,8300


In [9]:
# rename columns using the .rename() function. 
# RENAME is FUNCTION APPLIED TO DATAFRAME THAT ACCEPTS A COLUMNS = DICT {}
# new__dataframe_name = old_dataframe(columns ={})

# creating a copy of the dataframe by setting its value to current dataframe, but passing the .rename () which accepts a dict.
# the keys for the dict will be the old column name and the values are the new column names.

customer_dataframe_renamed = customer_dataframe.rename(columns={
    'Email': 'Email Address',
    'Payment Info': 'Pay Info',
    'Current Balance': 'Overall Balance'
})  

In [10]:
customer_dataframe_renamed

Unnamed: 0,Full Name,Email Address,Address,Zip Code,Pay Info,Overall Balance
0,Altha Frederick,unhideable1966@gmail.com,67 John Maher Extension,31353,2524 2317 2139 4751,21511
1,Nickolas Harvey,allgood1803@outlook.com,1200 Madera Plaza,1922,4756 0997 9568 1329,13850
2,Jesusita Kinney,satsumas1954@yahoo.com,943 Gibb Highway,41535,3717 863466 48574,21254
3,Mose Gordon,antifowl1875@gmail.com,1073 Fell Trace,16098,5413 1700 6989 2835,5221
4,Cesar Valentine,acetaminol1979@yahoo.com,805 Marshall Promenade,99895,5173 4883 9215 4743,8300
...,...,...,...,...,...,...
95,Crystle Larson,plantula1818@live.com,1317 Cesar Chavez On Brae,74413,3474 949615 85541,5091
96,Jetta Davenport,doolittle1818@yandex.com,618 Williams Terrace,73152,4711 5533 4972 2249,16509
97,Dallas Johnston,mosso1961@live.com,324 Tenny Line,88215,3794 280688 77410,24196
98,Roberto Daugherty,ovenful1914@yandex.com,1058 Marengo Manor,33903,4972 5355 6633 9108,18828


In [11]:
# old dataframe still exists. with old column names.

customer_dataframe

Unnamed: 0,Full Name,Email,Address,Zip Code,Payment Info,Current Balance
0,Altha Frederick,unhideable1966@gmail.com,67 John Maher Extension,31353,2524 2317 2139 4751,21511
1,Nickolas Harvey,allgood1803@outlook.com,1200 Madera Plaza,1922,4756 0997 9568 1329,13850
2,Jesusita Kinney,satsumas1954@yahoo.com,943 Gibb Highway,41535,3717 863466 48574,21254
3,Mose Gordon,antifowl1875@gmail.com,1073 Fell Trace,16098,5413 1700 6989 2835,5221
4,Cesar Valentine,acetaminol1979@yahoo.com,805 Marshall Promenade,99895,5173 4883 9215 4743,8300
...,...,...,...,...,...,...
95,Crystle Larson,plantula1818@live.com,1317 Cesar Chavez On Brae,74413,3474 949615 85541,5091
96,Jetta Davenport,doolittle1818@yandex.com,618 Williams Terrace,73152,4711 5533 4972 2249,16509
97,Dallas Johnston,mosso1961@live.com,324 Tenny Line,88215,3794 280688 77410,24196
98,Roberto Daugherty,ovenful1914@yandex.com,1058 Marengo Manor,33903,4972 5355 6633 9108,18828


In [12]:
# passing [] to a dataframe allows us to access items in the dataframe. much like a list.
# passing a list of columns.
customer_dataframe[['Full Name', 'Email']]     # this will display only the Full Name and Email columns

Unnamed: 0,Full Name,Email
0,Altha Frederick,unhideable1966@gmail.com
1,Nickolas Harvey,allgood1803@outlook.com
2,Jesusita Kinney,satsumas1954@yahoo.com
3,Mose Gordon,antifowl1875@gmail.com
4,Cesar Valentine,acetaminol1979@yahoo.com
...,...,...
95,Crystle Larson,plantula1818@live.com
96,Jetta Davenport,doolittle1818@yandex.com
97,Dallas Johnston,mosso1961@live.com
98,Roberto Daugherty,ovenful1914@yandex.com


In [18]:
customer_dataframe[['Full Name']]

Unnamed: 0,Full Name
0,Altha Frederick
1,Nickolas Harvey
2,Jesusita Kinney
3,Mose Gordon
4,Cesar Valentine
...,...
95,Crystle Larson
96,Jetta Davenport
97,Dallas Johnston
98,Roberto Daugherty


In [22]:
# if you simply do data_frame[column] it will display the rows, but no header

customer_dataframe['Full Name']

0       Altha Frederick
1       Nickolas Harvey
2       Jesusita Kinney
3           Mose Gordon
4       Cesar Valentine
            ...        
95       Crystle Larson
96      Jetta Davenport
97      Dallas Johnston
98    Roberto Daugherty
99         Abel Walters
Name: Full Name, Length: 100, dtype: object

In [15]:
customer_dataframe[['Email', 'Zip Code', 'Full Name']]   # display columns in order you retrieve the column names.

Unnamed: 0,Email,Zip Code,Full Name
0,unhideable1966@gmail.com,31353,Altha Frederick
1,allgood1803@outlook.com,1922,Nickolas Harvey
2,satsumas1954@yahoo.com,41535,Jesusita Kinney
3,antifowl1875@gmail.com,16098,Mose Gordon
4,acetaminol1979@yahoo.com,99895,Cesar Valentine
...,...,...,...
95,plantula1818@live.com,74413,Crystle Larson
96,doolittle1818@yandex.com,73152,Jetta Davenport
97,mosso1961@live.com,88215,Dallas Johnston
98,ovenful1914@yandex.com,33903,Roberto Daugherty


In [16]:
customer_dataframe.head()   # the datafram column orders are not changed though.

Unnamed: 0,Full Name,Email,Address,Zip Code,Payment Info,Current Balance
0,Altha Frederick,unhideable1966@gmail.com,67 John Maher Extension,31353,2524 2317 2139 4751,21511
1,Nickolas Harvey,allgood1803@outlook.com,1200 Madera Plaza,1922,4756 0997 9568 1329,13850
2,Jesusita Kinney,satsumas1954@yahoo.com,943 Gibb Highway,41535,3717 863466 48574,21254
3,Mose Gordon,antifowl1875@gmail.com,1073 Fell Trace,16098,5413 1700 6989 2835,5221
4,Cesar Valentine,acetaminol1979@yahoo.com,805 Marshall Promenade,99895,5173 4883 9215 4743,8300


In [23]:
customer_dataframe

Unnamed: 0,Full Name,Email,Address,Zip Code,Payment Info,Current Balance
0,Altha Frederick,unhideable1966@gmail.com,67 John Maher Extension,31353,2524 2317 2139 4751,21511
1,Nickolas Harvey,allgood1803@outlook.com,1200 Madera Plaza,1922,4756 0997 9568 1329,13850
2,Jesusita Kinney,satsumas1954@yahoo.com,943 Gibb Highway,41535,3717 863466 48574,21254
3,Mose Gordon,antifowl1875@gmail.com,1073 Fell Trace,16098,5413 1700 6989 2835,5221
4,Cesar Valentine,acetaminol1979@yahoo.com,805 Marshall Promenade,99895,5173 4883 9215 4743,8300
...,...,...,...,...,...,...
95,Crystle Larson,plantula1818@live.com,1317 Cesar Chavez On Brae,74413,3474 949615 85541,5091
96,Jetta Davenport,doolittle1818@yandex.com,618 Williams Terrace,73152,4711 5533 4972 2249,16509
97,Dallas Johnston,mosso1961@live.com,324 Tenny Line,88215,3794 280688 77410,24196
98,Roberto Daugherty,ovenful1914@yandex.com,1058 Marengo Manor,33903,4972 5355 6633 9108,18828


In [24]:
# add a column 

# when assigning a value to a list. we did list[index] = value.
# when adding a new item to a list we had to do list.append(item)

# here we add to our dataframe 
customer_dataframe['Balance (1K)'] = customer_dataframe['Current Balance'] / 1000
customer_dataframe

Unnamed: 0,Full Name,Email,Address,Zip Code,Payment Info,Current Balance,Balance (1K)
0,Altha Frederick,unhideable1966@gmail.com,67 John Maher Extension,31353,2524 2317 2139 4751,21511,21.511
1,Nickolas Harvey,allgood1803@outlook.com,1200 Madera Plaza,1922,4756 0997 9568 1329,13850,13.850
2,Jesusita Kinney,satsumas1954@yahoo.com,943 Gibb Highway,41535,3717 863466 48574,21254,21.254
3,Mose Gordon,antifowl1875@gmail.com,1073 Fell Trace,16098,5413 1700 6989 2835,5221,5.221
4,Cesar Valentine,acetaminol1979@yahoo.com,805 Marshall Promenade,99895,5173 4883 9215 4743,8300,8.300
...,...,...,...,...,...,...,...
95,Crystle Larson,plantula1818@live.com,1317 Cesar Chavez On Brae,74413,3474 949615 85541,5091,5.091
96,Jetta Davenport,doolittle1818@yandex.com,618 Williams Terrace,73152,4711 5533 4972 2249,16509,16.509
97,Dallas Johnston,mosso1961@live.com,324 Tenny Line,88215,3794 280688 77410,24196,24.196
98,Roberto Daugherty,ovenful1914@yandex.com,1058 Marengo Manor,33903,4972 5355 6633 9108,18828,18.828


In [25]:
customer_dataframe['The Number 1'] = 1    # this creates a new column called number 1, and assigns value 1 for all rows.
customer_dataframe

Unnamed: 0,Full Name,Email,Address,Zip Code,Payment Info,Current Balance,Balance (1K),The Number 1
0,Altha Frederick,unhideable1966@gmail.com,67 John Maher Extension,31353,2524 2317 2139 4751,21511,21.511,1
1,Nickolas Harvey,allgood1803@outlook.com,1200 Madera Plaza,1922,4756 0997 9568 1329,13850,13.850,1
2,Jesusita Kinney,satsumas1954@yahoo.com,943 Gibb Highway,41535,3717 863466 48574,21254,21.254,1
3,Mose Gordon,antifowl1875@gmail.com,1073 Fell Trace,16098,5413 1700 6989 2835,5221,5.221,1
4,Cesar Valentine,acetaminol1979@yahoo.com,805 Marshall Promenade,99895,5173 4883 9215 4743,8300,8.300,1
...,...,...,...,...,...,...,...,...
95,Crystle Larson,plantula1818@live.com,1317 Cesar Chavez On Brae,74413,3474 949615 85541,5091,5.091,1
96,Jetta Davenport,doolittle1818@yandex.com,618 Williams Terrace,73152,4711 5533 4972 2249,16509,16.509,1
97,Dallas Johnston,mosso1961@live.com,324 Tenny Line,88215,3794 280688 77410,24196,24.196,1
98,Roberto Daugherty,ovenful1914@yandex.com,1058 Marengo Manor,33903,4972 5355 6633 9108,18828,18.828,1


In [26]:
# let's add an ID column and have a unique ID for each row

customer_dataframe['ID'] = range(100)     # begins at 0
customer_dataframe

Unnamed: 0,Full Name,Email,Address,Zip Code,Payment Info,Current Balance,Balance (1K),The Number 1,ID
0,Altha Frederick,unhideable1966@gmail.com,67 John Maher Extension,31353,2524 2317 2139 4751,21511,21.511,1,0
1,Nickolas Harvey,allgood1803@outlook.com,1200 Madera Plaza,1922,4756 0997 9568 1329,13850,13.850,1,1
2,Jesusita Kinney,satsumas1954@yahoo.com,943 Gibb Highway,41535,3717 863466 48574,21254,21.254,1,2
3,Mose Gordon,antifowl1875@gmail.com,1073 Fell Trace,16098,5413 1700 6989 2835,5221,5.221,1,3
4,Cesar Valentine,acetaminol1979@yahoo.com,805 Marshall Promenade,99895,5173 4883 9215 4743,8300,8.300,1,4
...,...,...,...,...,...,...,...,...,...
95,Crystle Larson,plantula1818@live.com,1317 Cesar Chavez On Brae,74413,3474 949615 85541,5091,5.091,1,95
96,Jetta Davenport,doolittle1818@yandex.com,618 Williams Terrace,73152,4711 5533 4972 2249,16509,16.509,1,96
97,Dallas Johnston,mosso1961@live.com,324 Tenny Line,88215,3794 280688 77410,24196,24.196,1,97
98,Roberto Daugherty,ovenful1914@yandex.com,1058 Marengo Manor,33903,4972 5355 6633 9108,18828,18.828,1,98


In [27]:
# split full name into 2 separate columns

# 
# .str() allows us to access string methods
# split() split column on spaces. 
# expand = True expands the column Full Name into 2 columns
customer_dataframe['Full Name'].str.split(' ', expand=True)       # this isn't changing the dataframe object. just showing us a preview. 

Unnamed: 0,0,1
0,Altha,Frederick
1,Nickolas,Harvey
2,Jesusita,Kinney
3,Mose,Gordon
4,Cesar,Valentine
...,...,...
95,Crystle,Larson
96,Jetta,Davenport
97,Dallas,Johnston
98,Roberto,Daugherty


In [28]:
customer_dataframe

Unnamed: 0,Full Name,Email,Address,Zip Code,Payment Info,Current Balance,Balance (1K),The Number 1,ID
0,Altha Frederick,unhideable1966@gmail.com,67 John Maher Extension,31353,2524 2317 2139 4751,21511,21.511,1,0
1,Nickolas Harvey,allgood1803@outlook.com,1200 Madera Plaza,1922,4756 0997 9568 1329,13850,13.850,1,1
2,Jesusita Kinney,satsumas1954@yahoo.com,943 Gibb Highway,41535,3717 863466 48574,21254,21.254,1,2
3,Mose Gordon,antifowl1875@gmail.com,1073 Fell Trace,16098,5413 1700 6989 2835,5221,5.221,1,3
4,Cesar Valentine,acetaminol1979@yahoo.com,805 Marshall Promenade,99895,5173 4883 9215 4743,8300,8.300,1,4
...,...,...,...,...,...,...,...,...,...
95,Crystle Larson,plantula1818@live.com,1317 Cesar Chavez On Brae,74413,3474 949615 85541,5091,5.091,1,95
96,Jetta Davenport,doolittle1818@yandex.com,618 Williams Terrace,73152,4711 5533 4972 2249,16509,16.509,1,96
97,Dallas Johnston,mosso1961@live.com,324 Tenny Line,88215,3794 280688 77410,24196,24.196,1,97
98,Roberto Daugherty,ovenful1914@yandex.com,1058 Marengo Manor,33903,4972 5355 6633 9108,18828,18.828,1,98


In [29]:
# .shape  
# retutns a tuple that displays how many rows and columns in the dataframe. (# rows, # columns)

customer_dataframe.shape

(100, 9)

In [30]:
customer_dataframe.dtypes

Full Name           object
Email               object
Address             object
Zip Code             int64
Payment Info        object
Current Balance      int64
Balance (1K)       float64
The Number 1         int64
ID                   int32
dtype: object