In [1]:
# Config
CATEGORY_LIST = [
    "Rent & Utilities",
    "Groceries",
    "Shopping",
    "Travel",
    "Food & Drink",
    "Entertainment",
    "Furniture & Appliances",
    "Work & Equipment",
    "Service Bills & Fees",
    "Transport",
    "Family & Personal",
    "Other"
]
TYPE = "Expense"

In [2]:
import os
import psycopg2 as ps
import pandas as pd
from dotenv import load_dotenv
load_dotenv() 

conn = ps.connect(
    host=os.environ["db_host"],
    port=os.environ["db_port"],
    dbname=os.environ["db_name"],
    user=os.environ["db_user"],
    password=os.environ["db_password"])

In [3]:
# SQL query to pandas datafram
def postgresql_to_dataframe(conn, select_query, column_names):
    """
    Tranform a SELECT query into a pandas dataframe
    """
    cursor = conn.cursor()
    try:
        cursor.execute(select_query)
    except (Exception, ps.DatabaseError) as error:
        print("Error: %s" % error)
        cursor.close()
        return 1
    
    # Naturally we get a list of tupples
    tupples = cursor.fetchall()
    cursor.close()
    
    # We just need to turn it into a pandas dataframe
    df = pd.DataFrame(tupples, columns=column_names)
    return df

In [4]:
# Return insert sql script as string
import re

def get_insert_query_from_df(df, dest_table):

    insert = """
    INSERT INTO {dest_table} (
        """.format(dest_table=dest_table)

    columns_string = str(list(df.columns))[1:-1]
    columns_string = re.sub(r' ', '\n        ', columns_string)
    columns_string = re.sub(r'\'', '', columns_string)

    values_string = ''

    for row in df.itertuples(index=False,name=None):
        values_string += re.sub(r'nan', 'null', str(row))
        values_string += ',\n'

    return insert + columns_string + ')\n     VALUES\n' + values_string[:-2] + ';'

In [5]:
# Query this year transactions into dataframe
def create_dataframe_for_category(category):
    column_names = ["date", "total"]
    cursor = conn.cursor()
    query = "SELECT date, SUM(abs(amount)) OVER (ORDER BY date) as total FROM transactions WHERE type IN ('" + TYPE + "') AND date_part('year', date) = date_part('year', now()) AND category IN ('" + category + "') GROUP BY amount, date order BY date"
    df = postgresql_to_dataframe(conn, query, column_names)

    df.tail(1)
    total = df.tail(1)['total']


    # Calculate number of day between last entry of expense and start of this year
    from datetime import date

    start_of_year_date = date(date.today().year, 1, 1)
    end_of_year_date = date(date.today().year, 12, 31)

    day_delta = (df.tail(1)['date'] - start_of_year_date).dt.days

    # Calculate year end projections
    day_in_year = (end_of_year_date - start_of_year_date).days
    year_end_amount = round((total/day_delta) * day_in_year, 2).values[0]

    # Create dataframe for projections
    return pd.DataFrame({
        "date": [start_of_year_date.isoformat(), end_of_year_date.isoformat()],
        "type": [TYPE, TYPE],
        "category": [category, category],
        "amount": [0, year_end_amount]
    })

In [6]:
# Create dataframe for projections
df_projections = pd.DataFrame()
for category in CATEGORY_LIST:
    df_projections = pd.concat([df_projections, create_dataframe_for_category(category)])

insert_sql = (get_insert_query_from_df(df_projections, 'projections'))
file1 = open('./projections_insert.sql', 'w')
file1.write(insert_sql)
file1.close()