# Column Names

> Attribute names should be meaningful so that visualization is intuitive.
> <br>This notebook makes a function to append on the preprocess module.

In [17]:
# One run setup

### UCI repo _____________________________________________________________________________________
from ucimlrepo import fetch_ucirepo                     # Fetch datasets from the UCIML Repository

# Data Science ___________________________________________________________________________________
import numpy as np
import pandas as pd

### Fetching dataframes __________________________________________________________________________
german_credit = fetch_ucirepo(id=144)                   # All the data and metadata
df = fetch_ucirepo(id=144).data.original                # The actual data

In [18]:
# Original names
df.dtypes.to_frame(name='Data Types')

Unnamed: 0,Data Types
Attribute1,object
Attribute2,int64
Attribute3,object
Attribute4,object
Attribute5,int64
Attribute6,object
Attribute7,object
Attribute8,int64
Attribute9,object
Attribute10,object


In [19]:
# Change column names
df.columns = [
    'Checking Acc.', 
    'Duration (mo)',
    'Credit History',
    'Purpose',
    'Credit Amount',
    'Savings Acc.',
    'Employment Since',
    'Installment/Income',
    'Marital Status',
    'Other Debtors',
    'Residence Since',
    'Property',
    'Age',
    'Other Installment',
    'Housing',
    'Existing Credits',
    'Job',
    'Dependents',
    'Telephone',
    'Foreigner',
    'Target'
]
df.dtypes.to_frame(name='Data Types')

Unnamed: 0,Data Types
Checking Acc.,object
Duration (mo),int64
Credit History,object
Purpose,object
Credit Amount,int64
Savings Acc.,object
Employment Since,object
Installment/Income,int64
Marital Status,object
Other Debtors,object


In [16]:
# Append a new function to preprocess
with open('german-credit/utils/preprocess.py', 'a') as f:
    f.write("""
def set_column_names(df):
    \"""
    Sets descriptive names for every column in the German Credit
    @df: the actual dataframe to rename columns
    \"""    
    df.columns = [
        'Checking Acc.', 
        'Duration (mo)',
        'Credit History',
        'Purpose',
        'Credit Amount',
        'Savings Acc.',
        'Employment Since',
        'Installment/Income',
        'Marital Status',
        'Other Debtors',
        'Residence Since',
        'Property',
        'Age',
        'Other Installment',
        'Housing',
        'Existing Credits',
        'Job',
        'Dependents',
        'Telephone',
        'Foreigner',
        'Target'
    ]
""")
