In [1]:
import pandas as pd

def aggregate_expenditures_by_program(file_path):
    # Load the dataset
    df = pd.read_excel(file_path, sheet_name='Open Data')
    
    # Filter out only expenditure data and aggregate by program
    # Modify the column names if they are different in your files
    expenditure_data = df[df['Expense/Revenue'] == 'Expenses']
    aggregated_data = expenditure_data.groupby('Program').sum()
    
    return aggregated_data

# File paths for each year
file_paths = {
    2023: 'Resources/approved-operating-budget-summary-2023.xlsx',
    2022: 'Resources/approved-operating-budget-summary-2022.xlsx',
    2021: 'Resources/approved-operating-budget-summary-2021.xlsx',
    2020: 'Resources/approved-operating-budget-summary-2020.xlsx',
    2019: 'Resources/approved-operating-budget-summary-2019.xlsx',
  
}

# Aggregating expenditure data by program from each file
all_years_data = []
for year, file_path in file_paths.items():
    year_data = aggregate_expenditures_by_program(file_path)
    # You may want to add the year as a column or an index in year_data before appending
    year_data['Year'] = year  # Add year as a column
    all_years_data.append(year_data)

# Combine data from all years
combined_data = pd.concat(all_years_data)

# Displaying the combined summary
print(combined_data)


                                                                                    Service  \
Program                                                                                       
311 Toronto                               311 Development311 Development311 Development3...   
Arena Boards of Management                Community Ice & Facility BookingCommunity Ice ...   
Association of Community Centres          Social Economic & Neighbourhood DevelopmentSoc...   
Auditor General's Office                  Auditor General's OfficeAuditor General's Offi...   
Capital & Corporate Financing             Corporate AccountsCorporate AccountsCorporate ...   
...                                                                                     ...   
Toronto Transit Commission - Wheel Trans  Toronto Transit Commission - Wheel-Trans Servi...   
Toronto Water                             Stormwater ManagementStormwater ManagementStor...   
Toronto Zoo                               Zoo Cons

In [2]:
import pandas as pd

# Load one of the Excel files
file_paths = {
    2023: 'Resources/approved-operating-budget-summary-2023.xlsx',
    2022: 'Resources/approved-operating-budget-summary-2022.xlsx',
    2021: 'Resources/approved-operating-budget-summary-2021.xlsx',
    2020: 'Resources/approved-operating-budget-summary-2020.xlsx',
    2019: 'Resources/approved-operating-budget-summary-2019.xlsx',
  
}
df = pd.read_excel(file_path, sheet_name='Open Data')

# Print the first few rows and column names
print(df.head())
print(df.columns.tolist())


                     Program                       Service  \
0  Affordable Housing Office  Housing Improvement Programs   
1  Affordable Housing Office  Housing Improvement Programs   
2  Affordable Housing Office  Housing Improvement Programs   
3  Affordable Housing Office  Housing Improvement Programs   
4  Affordable Housing Office  Housing Improvement Programs   

                       Activity Expense/Revenue          Category Name  \
0  Housing Improvement Programs        Expenses  Salaries And Benefits   
1  Housing Improvement Programs        Expenses  Salaries And Benefits   
2  Housing Improvement Programs        Expenses  Salaries And Benefits   
3  Housing Improvement Programs        Expenses  Salaries And Benefits   
4  Housing Improvement Programs        Expenses  Salaries And Benefits   

  Sub-Category Name      Commitment item      2019  
0        Allowances  Benefits To Be Dist   1666.06  
1          Benefits    Comprehensive Med  29344.32  
2          Benefits    

In [3]:
import pandas as pd

def load_and_pivot_budget(file_path, year):
    df = pd.read_excel(file_path, sheet_name='Open Data')
    # Filter and aggregate the expenditure data by program
    # Assuming 'Expense/Revenue' indicates the type and 'Financial_Data' is the budget value
    expense_data = df[df['Expense/Revenue'] == 'Expenses']
    aggregated_data = expense_data.groupby('Program')['Expense/Revenue'].sum().reset_index()
    # Pivot this data with the year as a new column
    aggregated_data['Year'] = year
    pivoted_data = aggregated_data.pivot(index='Program', columns='Year', values='Expense/Revenue')
    return pivoted_data

# File paths for each year
file_paths = {
    2023: 'Resources/approved-operating-budget-summary-2023.xlsx',
    2022: 'Resources/approved-operating-budget-summary-2022.xlsx',
    2021: 'Resources/approved-operating-budget-summary-2021.xlsx',
    2020: 'Resources/approved-operating-budget-summary-2020.xlsx',
    2019: 'Resources/approved-operating-budget-summary-2019.xlsx',
  
}

# Load, pivot, and aggregate data from all files
all_years_data = [load_and_pivot_budget(path, year) for year, path in file_paths.items()]

# Combine pivoted data
combined_data = pd.concat(all_years_data, axis=1)

# Fill NaN values with 0 if necessary
combined_data = combined_data.fillna(0)

# Display the combined summary
print(combined_data)


Year                                                                                        2023  \
Program                                                                                            
311 Toronto                                    ExpensesExpensesExpensesExpensesExpensesExpens...   
Arena Boards of Management                     ExpensesExpensesExpensesExpensesExpensesExpens...   
Association of Community Centres               ExpensesExpensesExpensesExpensesExpensesExpens...   
Auditor General's Office                       ExpensesExpensesExpensesExpensesExpensesExpens...   
Capital & Corporate Financing                                   ExpensesExpensesExpensesExpenses   
...                                                                                          ...   
Affordable Housing Office                                                                      0   
Facilities, Real Estate, Environment & Energy                                                  0   


In [7]:
import pandas as pd

# Load the dataset
file_path = 'Resources/approved-operating-budget-summary-2023.xlsx'
budget_data = pd.read_excel(file_path, 'Open Data')  # Replace 'Open Data' with the correct sheet name if different

# Filtering only expenses
expenses_data = budget_data[budget_data['Expense/Revenue'] == 'Expenses']

# Grouping by Program and summing up the expenses
total_expenses_by_program = expenses_data.groupby('Program')[2023].sum().sort_values()

# Identifying the programs with the highest and lowest expenses
lowest_expense_program = total_expenses_by_program.idxmin()
highest_expense_program = total_expenses_by_program.idxmax()
lowest_expense_amount = total_expenses_by_program.min()
highest_expense_amount = total_expenses_by_program.max()

print("Lowest Expense Program:", lowest_expense_program, "with an expense of", lowest_expense_amount)
print("Highest Expense Program:", highest_expense_program, "with an expense of", highest_expense_amount)


Lowest Expense Program: Integrity Commissioner's Office with an expense of 759596.32

Highest Expense Program: Toronto Transit Commission - Conventional with an expense of 2237543963.48


In [5]:
###Lowest Program expenditure over 5 years

In [8]:
import pandas as pd

# Initialize lists to store results
lowest_expense_programs = []
highest_expense_programs = []

# Iterate over the files for each year
for year in range(2019, 2024):
    file_path = f'Resources/approved-operating-budget-summary-{year}.xlsx'
    budget_data = pd.read_excel(file_path, 'Open Data')  # Replace 'Open Data' with the correct sheet name if different

    # Filtering only expenses
    expenses_data = budget_data[budget_data['Expense/Revenue'] == 'Expenses']

    # Grouping by Program and summing up the expenses
    total_expenses_by_program = expenses_data.groupby('Program')[year].sum().sort_values()

    # Identifying the programs with the highest and lowest expenses
    lowest_expense_program = total_expenses_by_program.idxmin()
    highest_expense_program = total_expenses_by_program.idxmax()
    lowest_expense_amount = total_expenses_by_program.min()
    highest_expense_amount = total_expenses_by_program.max()

    # Append results to the lists
    lowest_expense_programs.append((year, lowest_expense_program, lowest_expense_amount))
    highest_expense_programs.append((year, highest_expense_program, highest_expense_amount))

# Print results
for year, program, amount in lowest_expense_programs:
    print(f"{year} - Lowest Expense Program: {program} with an expense of {amount}")

for year, program, amount in highest_expense_programs:
    print(f"{year} - Highest Expense Program: {program} with an expense of {amount}")


2019 - Lowest Expense Program: Integrity Commissioner's Office with an expense of 575725.34
2020 - Lowest Expense Program: Integrity Commissioner's Office with an expense of 762406.78
2021 - Lowest Expense Program: Integrity Commissioner's Office with an expense of 741313.24
2022 - Lowest Expense Program: Integrity Commissioner's Office with an expense of 761587.2
2023 - Lowest Expense Program: Integrity Commissioner's Office with an expense of 759596.32
2019 - Highest Expense Program: Toronto Transit Commission - Conventional with an expense of 1910540399.22
2020 - Highest Expense Program: Toronto Transit Commission - Conventional with an expense of 1987170798.92
2021 - Highest Expense Program: Toronto Transit Commission - Conventional with an expense of 2034438000.0
2022 - Highest Expense Program: Toronto Transit Commission - Conventional with an expense of 2096541463.5900002
2023 - Highest Expense Program: Toronto Transit Commission - Conventional with an expense of 2237543963.48


In [20]:
import pandas as pd

# Initialize variables to store the lowest and highest expense program details
lowest_expense_program, lowest_expense_amount = None, float('inf')
highest_expense_program, highest_expense_amount = None, -float('inf')

# Iterate over the files for each year
for year in range(2019, 2024):
    file_path = f'Resources/approved-operating-budget-summary-{year}.xlsx'
    budget_data = pd.read_excel(file_path, 'Open Data')

    # Filtering only expenses
    expenses_data = budget_data[budget_data['Expense/Revenue'] == 'Expenses']

    # Grouping by Program and summing up the expenses
    total_expenses_by_program = expenses_data.groupby('Program')[float(year)].sum()

    # Update the accumulated expenses and check for lowest/highest
    for program, expense in total_expenses_by_program.items():
        # Update lowest expense program
        if expense < lowest_expense_amount:
            lowest_expense_program, lowest_expense_amount = program, expense

        # Update highest expense program
        if expense > highest_expense_amount:
            highest_expense_program, highest_expense_amount = program, expense

# Print results
print("Overall Lowest Expense Program:", lowest_expense_program, "with an expense of", lowest_expense_amount)
print("Overall Highest Expense Program:", highest_expense_program, "with an expense of", highest_expense_amount)


Overall Lowest Expense Program: Integrity Commissioner's Office with an expense of 575725.34
Overall Highest Expense Program: Toronto Transit Commission - Conventional with an expense of 2237543963.48


In [24]:
import pandas as pd

# Initialize variables to store the lowest and highest revenue program details
lowest_revenue_program, lowest_revenue_amount = None, float('inf')
highest_revenue_program, highest_revenue_amount = None, -float('inf')

# Flag to check if revenue data exists
revenue_data_found = False

# Iterate over the files for each year
for year in range(2019, 2024):
    file_path = f'Resources/approved-operating-budget-summary-{year}.xlsx'
    budget_data = pd.read_excel(file_path, 'Open Data')

    # Filtering only revenues
    revenue_data = budget_data[budget_data['Expense/Revenue'] == 'Revenue']

    # Check if there is any revenue data
    if not revenue_data.empty:
        revenue_data_found = True
        # Grouping by Program and summing up the revenues
        total_revenues_by_program = revenue_data.groupby('Program')[float(year)].sum()

        # Update the accumulated revenues and check for lowest/highest
        for program, revenue in total_revenues_by_program.items():
            # Update lowest revenue program
            if revenue < lowest_revenue_amount:
                lowest_revenue_program, lowest_revenue_amount = program, revenue

            # Update highest revenue program
            if revenue > highest_revenue_amount:
                highest_revenue_program, highest_revenue_amount = program, revenue

# Print results
if revenue_data_found:
    print("Overall Lowest Revenue Program:", lowest_revenue_program, "with a revenue of", lowest_revenue_amount)
    print("Overall Highest Revenue Program:", highest_revenue_program, "with a revenue of", highest_revenue_amount)
else:
    print("No revenue data found in the provided files.")


No revenue data found in the provided files.
