In [1]:
# Import libraries
import pandas as pd
import numpy as np

### Load Excel File

In [2]:
filename = 'data/car_financing.xlsx'
df = pd.read_excel(filename)

## Renaming and Deleting Columns
It is often the case where you change your column names or remove unnecessary columns.

### Rename columns

Here are two popular ways to rename dataframe columns.
1. <b>dictionary substitution</b>: very useful if you only want to rename a few of the columns.
2. <b>list replacement</b>: requires a full list of names (in my experience, this is more error prone).

In [3]:
# DataFrame before renaming columns
df.head()

Unnamed: 0,Month,Starting Balance,Repayment,Interest Paid,Principal Paid,New Balance,term,interest_rate,car_type
0,1,34689.96,687.23,202.93,484.3,34205.66,60,0.0702,Toyota Sienna
1,2,34205.66,687.23,200.1,487.13,33718.53,60,0.0702,Toyota Sienna
2,3,33718.53,687.23,197.25,489.98,33228.55,60,0.0702,Toyota Sienna
3,4,33228.55,687.23,194.38,492.85,32735.7,60,0.0702,Toyota Sienna
4,5,32735.7,687.23,191.5,495.73,32239.97,60,0.0702,Toyota Sienna


In [4]:
# This wont work as there is a space in the column name
# I want to fix that:
df['Principal Paid']

0      484.30
1      487.13
2      489.98
3      492.85
4      495.73
        ...  
403    786.47
404    788.37
405    790.27
406    792.18
407    794.10
Name: Principal Paid, Length: 408, dtype: float64

In [22]:
# Approach 1 dictionary substitution using rename method
df = df.rename(columns={'Starting Balance': 'starting_balance',
                        'Interest Paid': 'interest_paid', 
                        'Principal Paid': 'principal_paid',
                        'New Balance': 'new_balance'})

In [23]:
# DataFrame after renaming columns
df.head()

Unnamed: 0,Month,starting_balance,Repayment,interest_paid,principal_paid,new_balance,term,interest_rate,car_type
0,1,34689.96,687.23,202.93,484.3,34205.66,60,0.0702,Toyota Sienna
1,2,34205.66,687.23,200.1,487.13,33718.53,60,0.0702,Toyota Sienna
2,3,33718.53,687.23,197.25,489.98,33228.55,60,0.0702,Toyota Sienna
3,4,33228.55,687.23,194.38,492.85,32735.7,60,0.0702,Toyota Sienna
4,5,32735.7,687.23,191.5,495.73,32239.97,60,0.0702,Toyota Sienna


In [24]:
# Approach 2 list replacement
# Only changing Month -> month, but we need to list the rest of the columns
df.columns = ['month',
              'starting_balance',
              'Repayment',
              'interest_paid',
              'principal_paid',
              'new_balance',
              'term',
              'interest_rate',
              'car_type']

In [25]:
df.head()

Unnamed: 0,month,starting_balance,Repayment,interest_paid,principal_paid,new_balance,term,interest_rate,car_type
0,1,34689.96,687.23,202.93,484.3,34205.66,60,0.0702,Toyota Sienna
1,2,34205.66,687.23,200.1,487.13,33718.53,60,0.0702,Toyota Sienna
2,3,33718.53,687.23,197.25,489.98,33228.55,60,0.0702,Toyota Sienna
3,4,33228.55,687.23,194.38,492.85,32735.7,60,0.0702,Toyota Sienna
4,5,32735.7,687.23,191.5,495.73,32239.97,60,0.0702,Toyota Sienna


### Deleting Columns

In [26]:
# Approach 1
# This approach allows you to drop multiple columns at a time 
df = df.drop(columns=['term'])

In [27]:
df.head()

Unnamed: 0,month,starting_balance,Repayment,interest_paid,principal_paid,new_balance,interest_rate,car_type
0,1,34689.96,687.23,202.93,484.3,34205.66,0.0702,Toyota Sienna
1,2,34205.66,687.23,200.1,487.13,33718.53,0.0702,Toyota Sienna
2,3,33718.53,687.23,197.25,489.98,33228.55,0.0702,Toyota Sienna
3,4,33228.55,687.23,194.38,492.85,32735.7,0.0702,Toyota Sienna
4,5,32735.7,687.23,191.5,495.73,32239.97,0.0702,Toyota Sienna


In [28]:
# Approach 2 use the del command
del df['Repayment']

In [5]:
df.head()

Unnamed: 0,Month,Starting Balance,Repayment,Interest Paid,Principal Paid,New Balance,term,interest_rate,car_type
0,1,34689.96,687.23,202.93,484.3,34205.66,60,0.0702,Toyota Sienna
1,2,34205.66,687.23,200.1,487.13,33718.53,60,0.0702,Toyota Sienna
2,3,33718.53,687.23,197.25,489.98,33228.55,60,0.0702,Toyota Sienna
3,4,33228.55,687.23,194.38,492.85,32735.7,60,0.0702,Toyota Sienna
4,5,32735.7,687.23,191.5,495.73,32239.97,60,0.0702,Toyota Sienna


In [7]:
#practice:
import pandas as pd

car_data_df = pd.read_excel('data/car_financing.xlsx')
car_data_df.head()

Unnamed: 0,Month,Starting Balance,Repayment,Interest Paid,Principal Paid,New Balance,term,interest_rate,car_type
0,1,34689.96,687.23,202.93,484.3,34205.66,60,0.0702,Toyota Sienna
1,2,34205.66,687.23,200.1,487.13,33718.53,60,0.0702,Toyota Sienna
2,3,33718.53,687.23,197.25,489.98,33228.55,60,0.0702,Toyota Sienna
3,4,33228.55,687.23,194.38,492.85,32735.7,60,0.0702,Toyota Sienna
4,5,32735.7,687.23,191.5,495.73,32239.97,60,0.0702,Toyota Sienna


In [9]:
car_data_df.drop(columns=['term'], inplace = True)

In [12]:
car_data_df.rename( columns = {'Month'  : 'month',               
'Starting Balance' : 'starting_balance' ,  
'Repayment' : 'repayment'  ,        
'Interest Paid'  : 'interest_paid'  ,   
'Principal Paid' : 'principal_paid' ,    
'New Balance'   : 'new_balance'  ,    
'interest_rate'  : 'interest_rate'} )

Unnamed: 0,month,starting_balance,repayment,interest_paid,principal_paid,new_balance,interest_rate,car_type
0,1,34689.96,687.23,202.93,484.30,34205.66,0.0702,Toyota Sienna
1,2,34205.66,687.23,200.10,487.13,33718.53,0.0702,Toyota Sienna
2,3,33718.53,687.23,197.25,489.98,33228.55,0.0702,Toyota Sienna
3,4,33228.55,687.23,194.38,492.85,32735.70,0.0702,Toyota Sienna
4,5,32735.70,687.23,191.50,495.73,32239.97,0.0702,Toyota Sienna
...,...,...,...,...,...,...,...,...
403,56,3951.11,796.01,9.54,786.47,3164.64,0.0290,VW Golf R
404,57,3164.64,796.01,7.64,788.37,2376.27,0.0290,VW Golf R
405,58,2376.27,796.01,5.74,790.27,1586.00,0.0290,VW Golf R
406,59,1586.00,796.01,3.83,792.18,793.82,0.0290,VW Golf R
