In [1]:
import os 
import pandas as pd

ROOTDIR = os.getcwd()
DATADIR = os.path.join(ROOTDIR, 'data')
 
RAWDATA = os.path.join(DATADIR, 'raw.csv')

In [14]:
new_names = {
    'transaction_id': 'id',
    'date': 'date',
    'name': 'name',
    'category': 'cat',
    'amount': 'amount', 
    'notes_and_#tags': 'note', 
    'description': 'desc'
}
       
def clean_names(df):
    df.columns = (
        df.columns
        .str.lower()
        .str.replace(' ', '_')          
    )
    return df


def clean_str(df):
    strs = df.select_dtypes('object')
    strs = strs.apply(lambda x: x.str.lower())
    df[strs.columns] = strs
    return df
    

def select_cols(df):
    return df[new_names]


def rename_cols(df):
    df = df.rename(columns=new_names)
    return df

def order_cols(df):
    return df[['date', 'desc', 'cat', 'amount', 'name', 'note', 'id']]
    
df = (
    pd.read_csv(RAWDATA, parse_dates={'date': [1, 2]})
    .pipe(clean_names)
    .pipe(clean_str)
    .pipe(select_cols)
    .pipe(rename_cols)
    .pipe(order_cols)
)
df.head()

Unnamed: 0,desc,cat,amount,name,note,id
0,fgtofg,,200.0,gunzinger,fgtofg,tx_00009cegw7obs9iqehznxb
1,p9811069,,-200.0,fabian gunzinger,p9811069,tx_00009ceh7gjc93f2z8ej7z
2,waitrose london gbr,groceries,-2.99,waitrose & partners,,tx_00009ctawup6crg1smr9ul
3,tesco stores 5103 hammersmith gbr,groceries,-5.05,tesco,,tx_00009ctafeypofhtytdfab
4,tfl travel charge tfl.gov.uk/cp gbr,transport,-12.4,transport for london,"travel charge for monday, 19 nov",tx_00009cujx410rx7cf2wo0v


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2418 entries, 0 to 2417
Data columns (total 16 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   transaction_id   2418 non-null   object 
 1   date             2418 non-null   object 
 2   time             2418 non-null   object 
 3   type             2418 non-null   object 
 4   name             2405 non-null   object 
 5   emoji            1326 non-null   object 
 6   category         1529 non-null   object 
 7   amount           2418 non-null   float64
 8   currency         2418 non-null   object 
 9   local_amount     2418 non-null   float64
 10  local_currency   2418 non-null   object 
 11  notes_and_#tags  584 non-null    object 
 12  address          1061 non-null   object 
 13  receipt          0 non-null      float64
 14  description      2321 non-null   object 
 15  category_split   0 non-null      float64
dtypes: float64(4), object(12)
memory usage: 302.4+ KB
