In [None]:
import pandas as pd
import csv
import config

In [None]:
pureDF = pd.DataFrame(columns=["Date", "Amount", "Description", "Additional", "Category"])

In [None]:
for f in config.IMPORT_FILES:
    print ("Loading File: ", f)
    df = pd.read_csv(f, header=None)

    if df.columns.size == 13:
        # Reset header to match line 1
        new_header = df.iloc[0]
        df = df[1:]
        df.columns = new_header

        # Filter rows
        df = df[df["Card Member"] == config.CARD_MEMBER_FILTER]

        # Transform data types
        df['Date'] = pd.to_datetime(df['Date'])
        df['Amount'] = pd.to_numeric(df['Amount']) * -1.0
        df['Additional'] = df["Extended Details"]

        # Remove Unused Columns
        df = df.drop("Address", axis=1)
        df = df.drop("City/State", axis=1)
        df = df.drop("Country", axis=1)
        df = df.drop("Zip Code", axis=1)
        df = df.drop("Card Member", axis=1)
        df = df.drop("Account #", axis=1)
        df = df.drop("Reference", axis=1)
        df = df.drop("Extended Details", axis=1)
        df = df.drop("Appears On Your Statement As", axis=1)

    elif df.columns.size == 5:
        df.rename(columns = {0: 'Date', 1: 'Amount', 2: "Unused", 3: "Additional", 4: 'Description'}, inplace=True)
        df['Date'] = pd.to_datetime(df['Date'])
        df = df.drop("Unused", axis=1)

    else:
        raise("Unknown Data Type!  Columns=" + str(df.columns.size))    

    pureDF = pd.concat([pureDF, df], axis=0)
    print("Added", str(len(df)), "records to data set, resulting in a new total of", str(len(pureDF)))
    print()

In [None]:
print(pureDF.columns)
print(pureDF.shape)
pureDF.head()

In [None]:
pureDF.to_csv(config.OUTPUT_FILE, index=False, quoting=csv.QUOTE_ALL)
print("Successfully saved output to:", config.OUTPUT_FILE)