Define a function that is able to request the inputs manually but also create a dummy dataset from scratch

In [259]:
import random
from datetime import datetime, timedelta
import numpy as np

#set the random seed for reproducibility
random.seed(34)

#define the function
def input_expenses(demo=True):
    '''
    Function to input the expenses. \n 
    Expenses can be added manually or they can be created as a synthetic dataset (demo=True)
    Returns a list of dicts being each one an expense.
    '''
    
    #if interactive is true, request to introcute the inputs manually
    if (demo==False):
        exp_date = input("Enter the date of the expense in the format YYYY-MM-DD")
        exp_category = input("Enter the category of the expense")
        exp_amount = input("Enter the amount of the expense")
        exp_description = input("Enter the description of the expense")

        #create a list with a dictionary including the inputs
        list_expenses = [{ \
            "date": exp_date, \
            "category": exp_category, \
            "amount": exp_amount, \
            "description": exp_description \
        }]
    #create a dummy list of inputs from scratch
    else:
        #create random dates between 2020 and 2024
        #first the days within the range 
        start_date = datetime(2020, 1, 1)
        end_date = datetime(2024, 12, 31)
        delta = end_date - start_date
        #create 20 random dates from the start and convert to the required format
        example_dates = [ \
            (start_date + timedelta(days=random.randint(0, delta.days))).strftime("%Y-%m-%d") for _ in range(20) \
        ]

        #create random expenses categories including missing data
        example_categories = [ \
            random.choice(["Groceries", "Clothing", "Entertaiment", np.nan]) for _ in range(20) \
        ]

        #create random amounts based on the categories
        example_amounts = []
        for i in example_categories:
            if(i=="Groceries"):
                example_amounts.append(random.randint(5,200))
            elif(i=="Clothing"):
                example_amounts.append(random.randint(5,100))
            elif(i=="Entertaiment"):
                example_amounts.append(random.randint(5,20))
            else:
                example_amounts.append(np.nan)

        #create random descriptions based on the categories
        example_descriptions = []
        for i in example_categories:
            if(i=="Groceries"):
                example_descriptions.append( \
                    "doing groceries in " + random.choice(["Aldi", "Lidl", "Tesco", "Dunnes"]) \
                )
            elif(i=="Clothing"):
                example_descriptions.append( \
                    "buying clothes in " + random.choice(["Penneys", "H&M", "Zara", "Dunnes"]) \
                )
            elif(i=="Entertaiment"):
                example_descriptions.append( \
                    "going to the cinema to watch " + random.choice( \
                        ["The Avengers", "Star Wars", "The Lord of the Rings"] \
                    ) \
                )
            else:
                example_descriptions.append(np.nan)

        #create a list of dictionaries with the inputs
        list_expenses = [ \
            { \
                "date": date, \
                "category": category, \
                "amount_usd": amount_usd, \
                "description": description \
            } for date, category, amount_usd, description in zip(example_dates, example_categories, example_amounts, example_descriptions) \
        ]
    
    #return only the dict
    return list_expenses

Define a function to visualize expenses avoiding incomplete entries

In [260]:
import pandas as pd
#list_expenses=input_expenses(True)
def visual_exp(list_expenses):
    "Function to visualize the expenses. This function expects as input a list of dicts being each dict a expense. It returns a pandas DF without missing data"

    #convert the list to a pandas DF
    pd_expenses = pd.DataFrame(list_expenses)

    #remove missing 
    pd_expenses_no_nan = pd_expenses.dropna()
    return pd_expenses_no_nan

In [261]:
len(list_expenses)

20

In [264]:
list_expenses = input_expenses(demo=True)
visual_exp(list_expenses = list_expenses)

Unnamed: 0,date,category,amount_usd,description
0,2022-11-30,Groceries,87.0,doing groceries in Aldi
1,2021-09-18,Clothing,44.0,buying clothes in H&M
2,2024-11-28,Clothing,26.0,buying clothes in Dunnes
3,2020-12-04,Entertaiment,17.0,going to the cinema to watch Star Wars
4,2024-08-31,Groceries,134.0,doing groceries in Aldi
5,2022-11-25,Entertaiment,19.0,going to the cinema to watch The Lord of the R...
6,2022-07-17,Groceries,14.0,doing groceries in Dunnes
7,2022-08-06,Groceries,7.0,doing groceries in Tesco
11,2023-04-02,Entertaiment,7.0,going to the cinema to watch The Lord of the R...
12,2023-11-28,Clothing,99.0,buying clothes in H&M
