In [None]:
import os
import pandas as pd

# Configure width of display for pandas data frame
pd.set_option('display.width', 400)
# Configure maximum columns shown
pd.set_option('display.max_columns', 15)
# Do not limit number of rows displayed, allows scroll
pd.set_option("display.max_rows", None)

# Create a folder called Data at the root of this project
base_path = '../Data'
column_converters = {'Date': pd.to_datetime,
                     'Type': str,
                     'Company': str,
                     'Cost': float,
                     'Venmo +': float,
                     'Venmo -': float,
                     'Pay': float,
                     'Net': float
                     }

months = [
    'Jan',  # January
    'Feb',  # February
    'Mar',  # March
    'Apr',  # April
    'May',  # May
    'Jun',  # June
    'Jul',  # July
    'Aug',  # August
    'Sep',  # September
    'Oct',  # October
    'Nov',  # November
    'Dec'   # December
]

ignored_pivot_categories = [
    'Pay'
]

## Specifying Column Data Types
The following are ways to set the specific data types.

### Setting explicitly
```python
for column, dtype in column_converters.items():
    if column in df.columns:
        df[column] = df[column].astype(dtype)
```

### Setting by inference
```python
# Infer data types
df_cleaned = df_cleaned.infer_objects(copy=False)
```

In [None]:
# Main Data frame, empty by default
data = pd.DataFrame()

for root, dirs, files in os.walk(base_path):
    for file in files:
        if 'Excel' in root and '_Expenses.xlsx' in file:
            file_path = os.path.join(root, file)
            # Read in the Excel file with specified sheet of the file
            df_raw = pd.read_excel(file_path, sheet_name='Expenses', converters=column_converters, usecols=lambda col: col in column_converters.keys() )
            df_cleaned = df_raw.dropna(subset=['Type', 'Company'])
            
            # Infer data types
            df_cleaned = df_cleaned.infer_objects(copy=False)
            # Replace all empty cells with 0
            df_cleaned.fillna(0, inplace=True)
            
            data = pd.concat([data, df_cleaned], ignore_index=True)
            print(file_path)

In [None]:
# Print consolidated Data
print(data)

In [None]:
# Creating Pivot Table, where row is categories and column is by month. Cell is summed value of that category for the month
pivot_df = pd.DataFrame(data)
pivot_df['Date'] = pd.to_datetime(pivot_df['Date'])  # Ensure Date column is in datetime format

# Adding Year and Month columns
pivot_df['Year'] = pivot_df['Date'].dt.year
pivot_df['Month'] = pivot_df['Date'].dt.to_period('M') # i.e. 2022-03
pivot_df['Month'] = pivot_df['Month'].dt.strftime('%B') # i.e. "January"
pivot_df['Month'] = pivot_df['Month'].str[:3] # i.e. "Jan"
pivot_df['Month'] = pd.Categorical(pivot_df['Month'], categories=months, ordered=True) # Order by given calendar months defined above

# Group by Type, Year, and Month and sum the Net column
pivot_df = pivot_df.groupby(['Type', 'Year', 'Month'], observed=True)['Net'].sum().reset_index()
print(pivot_df)

# Create Pivot Table
pivot_table = pivot_df.pivot_table(index='Type', columns=['Year', 'Month'], values='Net', fill_value=0, observed=True)
# print(pivot_table)

In [None]:
# Create separate DataFrames for each year if needed
for year in pivot_df['Year'].unique():
    # Extract data for the specific year
    year_df = pivot_table.xs(year, level=0, axis=1)  
    print(f"\nPivot Table for {year}:\n", year_df)

In [None]:
# Adding sum per category (column) and per month (row)

for year in pivot_df['Year'].unique():
    # Extract data for the specific year
    pivot_table_slice = pivot_table.xs(year, level=0, axis=1)  
    # Make a copy to avoid warning that you may or may not be modifying the original dataframe
    year_pivot_table = pivot_table_slice.copy() 
    # Drop certain categories
    year_pivot_table = year_pivot_table.drop(index=ignored_pivot_categories) 
    
    # Add a column that sums each row
    year_pivot_table.loc[:, 'Total'] = year_pivot_table.sum(axis=1)
    
    # Add a row at the bottom that sums each month
    monthly_sum = year_pivot_table.sum(axis=0).to_frame().T
    monthly_sum.index = ['Monthly Total']
    year_pivot_table = pd.concat([year_pivot_table, monthly_sum])
    
    # Drop Categories whose sum is 0, but not the Monthly Total
    year_pivot_table = year_pivot_table[(year_pivot_table['Total'] != 0) | (year_pivot_table.index == 'Monthly Total')]
    
    print(f"\nPivot Table for {year}:\n", year_pivot_table)