In [3]:
import pandas as pd

def aggregate_expenditures_by_program(file_path):
    # Load the dataset
    df = pd.read_excel(file_path, sheet_name='Open Data')
    
    # Filter out only expenditure data and aggregate by program
    # Modify the column names if they are different in your files
    expenditure_data = df[df['Expense/Revenue'] == 'Expenses']
    aggregated_data = expenditure_data.groupby('Program').sum()
    
    return aggregated_data

# File paths for each year
file_paths = {
    2023: 'Resources/approved-operating-budget-summary-2023.xlsx',
    2022: 'Resources/approved-operating-budget-summary-2022.xlsx',
    2021: 'Resources/approved-operating-budget-summary-2021.xlsx',
    2020: 'Resources/approved-operating-budget-summary-2020.xlsx',
    2019: 'Resources/approved-operating-budget-summary-2019.xlsx',
  
}

# Aggregating expenditure data by program from each file
all_years_data = []
for year, file_path in file_paths.items():
    year_data = aggregate_expenditures_by_program(file_path)
    # You may want to add the year as a column or an index in year_data before appending
    year_data['Year'] = year  # Add year as a column
    all_years_data.append(year_data)

# Combine data from all years
combined_data = pd.concat(all_years_data)

# Displaying the combined summary
print(combined_data)


                                                                                    Service  \
Program                                                                                       
311 Toronto                               311 Development311 Development311 Development3...   
Arena Boards of Management                Community Ice & Facility BookingCommunity Ice ...   
Association of Community Centres          Social Economic & Neighbourhood DevelopmentSoc...   
Auditor General's Office                  Auditor General's OfficeAuditor General's Offi...   
Capital & Corporate Financing             Corporate AccountsCorporate AccountsCorporate ...   
...                                                                                     ...   
Toronto Transit Commission - Wheel Trans  Toronto Transit Commission - Wheel-Trans Servi...   
Toronto Water                             Stormwater ManagementStormwater ManagementStor...   
Toronto Zoo                               Zoo Cons

In [5]:
import pandas as pd

# Load one of the Excel files
file_paths = {
    2023: 'Resources/approved-operating-budget-summary-2023.xlsx',
    2022: 'Resources/approved-operating-budget-summary-2022.xlsx',
    2021: 'Resources/approved-operating-budget-summary-2021.xlsx',
    2020: 'Resources/approved-operating-budget-summary-2020.xlsx',
    2019: 'Resources/approved-operating-budget-summary-2019.xlsx',
  
}
df = pd.read_excel(file_path, sheet_name='Open Data')

# Print the first few rows and column names
print(df.head())
print(df.columns.tolist())


                     Program                       Service  \
0  Affordable Housing Office  Housing Improvement Programs   
1  Affordable Housing Office  Housing Improvement Programs   
2  Affordable Housing Office  Housing Improvement Programs   
3  Affordable Housing Office  Housing Improvement Programs   
4  Affordable Housing Office  Housing Improvement Programs   

                       Activity Expense/Revenue          Category Name  \
0  Housing Improvement Programs        Expenses  Salaries And Benefits   
1  Housing Improvement Programs        Expenses  Salaries And Benefits   
2  Housing Improvement Programs        Expenses  Salaries And Benefits   
3  Housing Improvement Programs        Expenses  Salaries And Benefits   
4  Housing Improvement Programs        Expenses  Salaries And Benefits   

  Sub-Category Name      Commitment item      2019  
0        Allowances  Benefits To Be Dist   1666.06  
1          Benefits    Comprehensive Med  29344.32  
2          Benefits    

In [7]:
import pandas as pd

def load_and_pivot_budget(file_path, year):
    df = pd.read_excel(file_path, sheet_name='Open Data')
    # Filter and aggregate the expenditure data by program
    # Assuming 'Expense/Revenue' indicates the type and 'Financial_Data' is the budget value
    expense_data = df[df['Expense/Revenue'] == 'Expenses']
    aggregated_data = expense_data.groupby('Program')['Expense/Revenue'].sum().reset_index()
    # Pivot this data with the year as a new column
    aggregated_data['Year'] = year
    pivoted_data = aggregated_data.pivot(index='Program', columns='Year', values='Expense/Revenue')
    return pivoted_data

# File paths for each year
file_paths = {
    2023: 'Resources/approved-operating-budget-summary-2023.xlsx',
    2022: 'Resources/approved-operating-budget-summary-2022.xlsx',
    2021: 'Resources/approved-operating-budget-summary-2021.xlsx',
    2020: 'Resources/approved-operating-budget-summary-2020.xlsx',
    2019: 'Resources/approved-operating-budget-summary-2019.xlsx',
  
}

# Load, pivot, and aggregate data from all files
all_years_data = [load_and_pivot_budget(path, year) for year, path in file_paths.items()]

# Combine pivoted data
combined_data = pd.concat(all_years_data, axis=1)

# Fill NaN values with 0 if necessary
combined_data = combined_data.fillna(0)

# Display the combined summary
print(combined_data)


Year                                                                                        2023  \
Program                                                                                            
311 Toronto                                    ExpensesExpensesExpensesExpensesExpensesExpens...   
Arena Boards of Management                     ExpensesExpensesExpensesExpensesExpensesExpens...   
Association of Community Centres               ExpensesExpensesExpensesExpensesExpensesExpens...   
Auditor General's Office                       ExpensesExpensesExpensesExpensesExpensesExpens...   
Capital & Corporate Financing                                   ExpensesExpensesExpensesExpenses   
...                                                                                          ...   
Affordable Housing Office                                                                      0   
Facilities, Real Estate, Environment & Energy                                                  0   


In [9]:
import pandas as pd

def load_and_pivot_budget(file_path, year):
    df = pd.read_excel(file_path, sheet_name='Open Data')
    
    # The column name for financial data is the year, based on the provided image
    financial_column = str(2019,2020,2021,2022,2023)
    
    # Filter and aggregate the expenditure data by program
    expense_data = df[df['Expense/Revenue'] == 'Expenses']
    aggregated_data = expense_data.groupby('Program')[financial_column].sum().reset_index()
    
    # Pivot this data with the year as a new column
    pivoted_data = aggregated_data.pivot(index='Program', columns='Program', values=financial_column)
    pivoted_data.columns = [f'Expenses_{2019,2020,2021,2023}']  # Rename the column to reflect the year
    return pivoted_data

# File paths for each year, adjust the paths as necessary
file_paths = {
    2023: 'Resources/approved-operating-budget-summary-2023.xlsx',
    2022: 'Resources/approved-operating-budget-summary-2022.xlsx',
    2021: 'Resources/approved-operating-budget-summary-2021.xlsx',
    2020: 'Resources/approved-operating-budget-summary-2020.xlsx',
    2019: 'Resources/approved-operating-budget-summary-2019.xlsx',
  
}

# Load, pivot, and aggregate data from all files
all_years_data = []
for year, file_path in file_paths.items():
    year_data = load_and_pivot_budget(file_path, year)
    all_years_data.append(year_data)

# Combine pivoted data from all years
combined_data = pd.concat(all_years_data, axis=1)

# Display the combined summary
print(combined_data)


TypeError: str() takes at most 3 arguments (5 given)

In [10]:
import pandas as pd

# Define file paths
file_paths = {
    2023: 'Resources/approved-operating-budget-summary-2023.xlsx',
    2022: 'Resources/approved-operating-budget-summary-2022.xlsx',
    2021: 'Resources/approved-operating-budget-summary-2021.xlsx',
    2020: 'Resources/approved-operating-budget-summary-2020.xlsx',
    2019: 'Resources/approved-operating-budget-summary-2019.xlsx',
  
}

# Initialize an empty DataFrame to hold the aggregated expenses by program
total_expenses_by_program = pd.DataFrame()

# Process each file
for file_path in file_paths:
    # Load the data
    df = pd.read_excel(file_path, sheet_name='Open Data')
    # Assuming the column for financial data is the last one
    financial_column = df.columns[-1]
    # Filter for expenses and aggregate by program
    expenses_by_program = df[df['Expense/Revenue'] == 'Expenses'].groupby('Program')[financial_column].sum()
    # Combine the data
    total_expenses_by_program = total_expenses_by_program.add(expenses_by_program, fill_value=0)

# After processing all files, identify the program with the highest and lowest expenses
highest_expense_program = total_expenses_by_program.idxmax()
lowest_expense_program = total_expenses_by_program.idxmin()

highest_expense_value = total_expenses_by_program.max()
lowest_expense_value = total_expenses_by_program.min()

print(f"Program with the highest expenses: {highest_expense_program} - Value: {highest_expense_value}")
print(f"Program with the lowest expenses: {lowest_expense_program} - Value: {lowest_expense_value}")


ValueError: Invalid file path or buffer object type: <class 'int'>

In [20]:
import pandas as pd

# Function to load data and aggregate expenditures by program
def load_and_aggregate_data(file_path):
    # Loading the Excel file
    df = pd.read_excel(file_path, sheet_name='Open Data')
    # Selecting rows where 'Expense/Revenue' column has 'Expense' and then aggregating the expenses by 'Program'
    expenses = df[df['Expense/Revenue'] == 'Expense'].groupby('Program').agg(Total_Expenses=pd.NamedAgg(column=str(2023), aggfunc='sum'))
    return expenses

# File paths for each year
file_paths = {
    2023: 'Resources/approved-operating-budget-summary-2023.xlsx',
    2022: 'Resources/approved-operating-budget-summary-2022.xlsx',
    2021: 'Resources/approved-operating-budget-summary-2021.xlsx',
    2020: 'Resources/approved-operating-budget-summary-2020.xlsx',
    2019: 'Resources/approved-operating-budget-summary-2019.xlsx',
}

# Initialize an empty DataFrame to hold the combined data
combined_expenses = pd.DataFrame()

# Iterate over the file paths and aggregate data
for year, path in file_paths.items():
    yearly_data = load_and_aggregate_data(path)
    # Combine the yearly data with the existing DataFrame
    combined_expenses = combined_expenses.add(yearly_data, fill_value=0)

# Identifying the programs with highest and lowest expenses
highest_expense_program = combined_expenses['Total_Expenses'].idxmax()
lowest_expense_program = combined_expenses['Total_Expenses'].idxmin()

highest_expense_value = combined_expenses['Total_Expenses'].max()
lowest_expense_value = combined_expenses['Total_Expenses'].min()

print(f"Program with the highest expenses: {highest_expense_program} - Value: {highest_expense_value}")
print(f"Program with the lowest expenses: {lowest_expense_program} - Value: {lowest_expense_value}")


KeyError: "Column(s) ['2023'] do not exist"