In [29]:
import pandas as pd
from sqlalchemy import create_engine
import numpy as np

In [30]:
# Assuming you have a SQLAlchemy engine
# Replace 'sqlite:///your_database.db' with your actual database connection string
engine = create_engine('sqlite:///data/database.db')

In [31]:
# Step 1: Load the tables
definition_df = pd.read_sql_table("E01_CashFlowDefinition", engine)
definition_accounts_df = pd.read_sql_table('E01_CashFlowDefinitionAccounts', engine)
transactions_df = pd.read_sql_query('SELECT * FROM D12_CF_BankReceipts_Aggregated WHERE strftime("%Y", d_date) = "2024"', engine)

In [32]:
definition_df.head()

Unnamed: 0,id,key,text
0,1,k,saimnieciskās darbības ieņēmumi
1,2,l,citi ieņēmumi
2,3,m,maksājumi par izejvielām
3,4,n,PVN maksājumi


In [33]:
definition_accounts_df.head()

Unnamed: 0,id,definition_id,operator,entry_type,account
0,1,1,+,CR,6110
1,8,2,+,CR,6550
2,10,3,-,DR,7110
3,11,3,-,DR,7210
4,12,4,-,DR,5721


In [34]:
transactions_df.head()

Unnamed: 0,d_id,d_type,d_date,d_customer,gl_account,gl_entry_type,gl_amount,d_currency,gl_amount_LC
0,1700,1,2024-01-03 00:00:00.000000,67,2310,DR,0.0,EUR,0.0
1,1700,1,2024-01-03 00:00:00.000000,67,2620,DR,6736.19,EUR,6736.19
2,1700,1,2024-01-03 00:00:00.000000,67,5721,CR,1169.09,EUR,1169.09
3,1700,1,2024-01-03 00:00:00.000000,67,6110,CR,2376.03,EUR,2376.03
4,1700,1,2024-01-03 00:00:00.000000,67,6550,CR,3191.07,EUR,3191.07


In [35]:
# Step 2: Perform the LEFT JOIN on entry_type and account
# This will join all rows in definition with matching rows in transactions
definition_df = pd.merge(
    definition_df.rename(columns={"id":"definition_id"}),
    definition_accounts_df,
    on="definition_id")



In [36]:
definition_df.drop(columns=["id"], inplace=True)

In [37]:
definition_df.head()

Unnamed: 0,definition_id,key,text,operator,entry_type,account
0,1,k,saimnieciskās darbības ieņēmumi,+,CR,6110
1,2,l,citi ieņēmumi,+,CR,6550
2,3,m,maksājumi par izejvielām,-,DR,7110
3,3,m,maksājumi par izejvielām,-,DR,7210
4,4,n,PVN maksājumi,-,DR,5721


In [38]:
# This will join all rows in definition with matching rows in transactions

merged_df = definition_df.merge(
    transactions_df,
    left_on=['entry_type', 'account'],        # Columns in definition_df
    right_on=['gl_entry_type', 'gl_account'], # Corresponding columns in transactions_df
    how='left'
)

In [39]:
merged_df.head()


Unnamed: 0,definition_id,key,text,operator,entry_type,account,d_id,d_type,d_date,d_customer,gl_account,gl_entry_type,gl_amount,d_currency,gl_amount_LC
0,1,k,saimnieciskās darbības ieņēmumi,+,CR,6110,1700.0,1.0,2024-01-03 00:00:00.000000,67.0,6110,CR,2376.03,EUR,2376.03
1,1,k,saimnieciskās darbības ieņēmumi,+,CR,6110,2030.0,1.0,2024-01-03 00:00:00.000000,43.0,6110,CR,6870.59,EUR,6870.59
2,1,k,saimnieciskās darbības ieņēmumi,+,CR,6110,2031.0,1.0,2024-01-04 00:00:00.000000,70.0,6110,CR,14.6,EUR,14.6
3,1,k,saimnieciskās darbības ieņēmumi,+,CR,6110,1590.0,1.0,2024-01-05 00:00:00.000000,93.0,6110,CR,5952.3,EUR,5952.3
4,1,k,saimnieciskās darbības ieņēmumi,+,CR,6110,2333.0,1.0,2024-01-05 00:00:00.000000,67.0,6110,CR,1731.35,EUR,1731.35


In [9]:
# Step 3: Apply the operator to the amount
# Use np.where to adjust the amount based on the operator column
merged_df['adjusted_amount'] = np.where(
    merged_df['operator'] == '+', 
    merged_df['gl_amount_LC'], 
    -merged_df['gl_amount_LC']
)

In [10]:
merged_df.head()

Unnamed: 0,id,definition_id,operator,entry_type,account,d_id,d_type,d_date,d_customer,gl_account,gl_entry_type,gl_amount,d_currency,gl_amount_LC,adjusted_amount
0,1,1,+,CR,6110,1700.0,1.0,2024-01-03 00:00:00.000000,67.0,6110,CR,2376.03,EUR,2376.03,2376.03
1,1,1,+,CR,6110,2030.0,1.0,2024-01-03 00:00:00.000000,43.0,6110,CR,6870.59,EUR,6870.59,6870.59
2,1,1,+,CR,6110,2031.0,1.0,2024-01-04 00:00:00.000000,70.0,6110,CR,14.6,EUR,14.6,14.6
3,1,1,+,CR,6110,1590.0,1.0,2024-01-05 00:00:00.000000,93.0,6110,CR,5952.3,EUR,5952.3,5952.3
4,1,1,+,CR,6110,2333.0,1.0,2024-01-05 00:00:00.000000,67.0,6110,CR,1731.35,EUR,1731.35,1731.35


In [11]:
# Step 4: Set up weekly periods for the transaction dates
# Convert `date` column to datetime if not already done
merged_df['d_date'] = pd.to_datetime(merged_df['d_date'])

In [12]:
merged_df.head()

Unnamed: 0,id,definition_id,operator,entry_type,account,d_id,d_type,d_date,d_customer,gl_account,gl_entry_type,gl_amount,d_currency,gl_amount_LC,adjusted_amount
0,1,1,+,CR,6110,1700.0,1.0,2024-01-03,67.0,6110,CR,2376.03,EUR,2376.03,2376.03
1,1,1,+,CR,6110,2030.0,1.0,2024-01-03,43.0,6110,CR,6870.59,EUR,6870.59,6870.59
2,1,1,+,CR,6110,2031.0,1.0,2024-01-04,70.0,6110,CR,14.6,EUR,14.6,14.6
3,1,1,+,CR,6110,1590.0,1.0,2024-01-05,93.0,6110,CR,5952.3,EUR,5952.3,5952.3
4,1,1,+,CR,6110,2333.0,1.0,2024-01-05,67.0,6110,CR,1731.35,EUR,1731.35,1731.35


In [13]:
# Calculate the week ending dates for each transaction
merged_df['week_end'] = merged_df['d_date'] + pd.offsets.Week(weekday=6)  # Week ends on Sunday
merged_df.head()

Unnamed: 0,id,definition_id,operator,entry_type,account,d_id,d_type,d_date,d_customer,gl_account,gl_entry_type,gl_amount,d_currency,gl_amount_LC,adjusted_amount,week_end
0,1,1,+,CR,6110,1700.0,1.0,2024-01-03,67.0,6110,CR,2376.03,EUR,2376.03,2376.03,2024-01-07
1,1,1,+,CR,6110,2030.0,1.0,2024-01-03,43.0,6110,CR,6870.59,EUR,6870.59,6870.59,2024-01-07
2,1,1,+,CR,6110,2031.0,1.0,2024-01-04,70.0,6110,CR,14.6,EUR,14.6,14.6,2024-01-07
3,1,1,+,CR,6110,1590.0,1.0,2024-01-05,93.0,6110,CR,5952.3,EUR,5952.3,5952.3,2024-01-07
4,1,1,+,CR,6110,2333.0,1.0,2024-01-05,67.0,6110,CR,1731.35,EUR,1731.35,1731.35,2024-01-07


In [14]:
# Step 5: Aggregate by week and row_id
# Group by row_id and week_end, summing the adjusted amounts
aggregated_df = (
    merged_df.groupby(['definition_id', 'week_end'])['adjusted_amount']
    .sum()
    .reset_index()
)
aggregated_df.head()

Unnamed: 0,definition_id,week_end,adjusted_amount
0,1,2024-01-07,16944.87
1,1,2024-01-14,11614.31
2,1,2024-01-21,7718.96
3,1,2024-01-28,10022.3
4,1,2024-02-11,214.36


In [15]:
# Step 6: Create a pivot table
# Pivot with row_id as rows, week_end as columns, and adjusted_amount as values
pivot_df = aggregated_df.pivot_table(
    index='definition_id',
    columns='week_end',
    values='adjusted_amount',
    fill_value=0
)
pivot_df.head()

week_end,2024-01-07,2024-01-14,2024-01-21,2024-01-28,2024-02-11,2024-02-18,2024-02-25,2024-03-03,2024-03-10,2024-03-24,...,2024-10-27,2024-11-03,2024-11-10,2024-11-17,2024-11-24,2024-12-01,2024-12-08,2024-12-15,2024-12-22,2024-12-29
definition_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,16944.87,11614.31,7718.96,10022.3,214.36,3284.94,15948.17,2978.2,10457.39,2768.05,...,7852.09,0.0,1460.69,78.79,5598.72,5956.23,0.0,6189.78,3156.65,0.0
2,10861.16,9284.56,9524.83,12576.13,606.21,16868.43,12804.67,3503.0,12920.2,4719.53,...,9041.21,5330.33,4055.51,8314.51,2755.92,2204.96,1403.99,7556.09,9379.13,13197.29
4,5839.27,4388.76,3621.2,4745.66,172.32,4232.22,6038.09,1361.06,4909.3,1572.4,...,3547.6,1119.37,1158.4,1762.6,1754.47,1713.85,294.84,2886.63,2632.52,2771.44


In [16]:
# Step 7: Ensure every week in 2024 is represented as a column
# Define all weeks in 2024 to create a complete set of columns
all_weeks = pd.date_range(start="2024-01-01", end="2024-12-31", freq='W-SUN')

In [17]:
# Reindex pivot table with all weeks in 2024 as columns
pivot_df = pivot_df.reindex(columns=all_weeks, fill_value=0)
pivot_df.head()

Unnamed: 0_level_0,2024-01-07,2024-01-14,2024-01-21,2024-01-28,2024-02-04,2024-02-11,2024-02-18,2024-02-25,2024-03-03,2024-03-10,...,2024-10-27,2024-11-03,2024-11-10,2024-11-17,2024-11-24,2024-12-01,2024-12-08,2024-12-15,2024-12-22,2024-12-29
definition_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,16944.87,11614.31,7718.96,10022.3,0,214.36,3284.94,15948.17,2978.2,10457.39,...,7852.09,0.0,1460.69,78.79,5598.72,5956.23,0.0,6189.78,3156.65,0.0
2,10861.16,9284.56,9524.83,12576.13,0,606.21,16868.43,12804.67,3503.0,12920.2,...,9041.21,5330.33,4055.51,8314.51,2755.92,2204.96,1403.99,7556.09,9379.13,13197.29
4,5839.27,4388.76,3621.2,4745.66,0,172.32,4232.22,6038.09,1361.06,4909.3,...,3547.6,1119.37,1158.4,1762.6,1754.47,1713.85,294.84,2886.63,2632.52,2771.44


In [18]:
# Optional: Format column headers to display dates as strings (YYYY-MM-DD)
# Ensure columns are in datetime format before formatting
pivot_df.columns = pd.to_datetime(pivot_df.columns, errors='coerce')

# Format column headers to show only the date part
pivot_df.columns = [col.strftime('%Y-%m-%d') if not pd.isnull(col) else col for col in pivot_df.columns]
pivot_df.head()

Unnamed: 0_level_0,2024-01-07,2024-01-14,2024-01-21,2024-01-28,2024-02-04,2024-02-11,2024-02-18,2024-02-25,2024-03-03,2024-03-10,...,2024-10-27,2024-11-03,2024-11-10,2024-11-17,2024-11-24,2024-12-01,2024-12-08,2024-12-15,2024-12-22,2024-12-29
definition_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,16944.87,11614.31,7718.96,10022.3,0,214.36,3284.94,15948.17,2978.2,10457.39,...,7852.09,0.0,1460.69,78.79,5598.72,5956.23,0.0,6189.78,3156.65,0.0
2,10861.16,9284.56,9524.83,12576.13,0,606.21,16868.43,12804.67,3503.0,12920.2,...,9041.21,5330.33,4055.51,8314.51,2755.92,2204.96,1403.99,7556.09,9379.13,13197.29
4,5839.27,4388.76,3621.2,4745.66,0,172.32,4232.22,6038.09,1361.06,4909.3,...,3547.6,1119.37,1158.4,1762.6,1754.47,1713.85,294.84,2886.63,2632.52,2771.44


In [23]:
a = pd.to_datetime("2024-11-17")
a

Timestamp('2024-11-17 00:00:00')

In [24]:
b = a + pd.offsets.Week(n=0, weekday=6)
b

Timestamp('2024-11-17 00:00:00')

In [30]:
c = pivot_df.iat[0, 4]
type(c)

numpy.int64

In [28]:
type(pivot_df.columns[1])

str