In [1]:
import pandas as pd
from pathlib import Path
import openpyxl
import matplotlib.pyplot as plt
import numpy as np

In [2]:
#Using the path function from the pathlib library to read the excel files
#Loading the operating budget data into dataframes
budget_path_2019 = Path("Resources/approved-operating-budget-summary-2019.xlsx") #2019
budget2019_df = pd.read_excel(budget_path_2019, sheet_name='Open Data')
budget_path_2020 = Path("Resources/approved-operating-budget-summary-2020.xlsx") #2020
budget2020_df = pd.read_excel(budget_path_2020, sheet_name='Open Data')
budget_path_2021 = Path("Resources/approved-operating-budget-summary-2021.xlsx") #2021
budget2021_df = pd.read_excel(budget_path_2021, sheet_name='Open Data')
budget_path_2022 = Path("Resources/approved-operating-budget-summary-2022.xlsx") #2022
budget2022_df = pd.read_excel(budget_path_2022, sheet_name='Open Data')
budget_path_2023 = Path("Resources/approved-operating-budget-summary-2023.xlsx") #2023
budget2023_df = pd.read_excel(budget_path_2023, sheet_name='Open Data')
df_list = [budget2019_df,budget2020_df,budget2021_df,budget2022_df,budget2023_df]

In [3]:
#Changing the names of the excel sheet for consistency
#The sheets are already renamed, and is only done once permanently
#The old workseet title for workbook 2019 was '2019'
#The old workseet title for workbook 2021 was '2021'
workbook2019 = openpyxl.load_workbook(budget_path_2019) #opening the 2019 approved budget summary workbook
rename_2019_sheet = workbook2019['Open Data'] #selecting the excel sheet to rename
rename_2019_sheet.title = 'Open Data' #changing the title 
workbook2019.save(budget_path_2019)  #saving the modified file using the original file path
workbook2019.close()  #closing the workbook

In [4]:
workbook2021 = openpyxl.load_workbook(budget_path_2021) #opening the 2019 approved budget summary workbook
sheet_names = workbook2021.sheetnames # Get the sheet names of the workbook
if '2021' in sheet_names: # if '2021' is in the sheet names
    rename_2021_sheet = workbook2021['Open Data']  # Selecting the Excel sheet to rename
    rename_2021_sheet.title = 'Open Data'  # Changing the title
    workbook2021.save(budget_path_2021) #Saving the modified excel sheet
workbook2021.close() # Closing the workbook

In [10]:
budget2019_df.drop_duplicates(subset = 'Budgeted amount 2019')
budget2020_df.drop_duplicates(subset = 'Budgeted amount 2020')
budget2021_df.drop_duplicates(subset = 'Budgeted amount 2021')
budget2022_df.drop_duplicates(subset = 'Budgeted amount 2022')
budget2023_df.drop_duplicates(subset = 'Budgeted amount 2023')

Unnamed: 0,Program,Service,Activity,Expense/Revenue,Category Name,Sub-Category Name,Commitment item,Budgeted amount 2023
0,311 Toronto,311 Development,311 PMO,Expenses,Benefits,Benefits,Canada Pension Plan,6.329340e+04
1,311 Toronto,311 Development,311 PMO,Expenses,Service And Rent,Telecommnuncation,Cellular Telephones,2.830000e+03
2,311 Toronto,311 Development,311 PMO,Expenses,Benefits,Benefits,Comprehensive Medical,7.259532e+04
3,311 Toronto,311 Development,311 PMO,Expenses,Benefits,Benefits,Dental Plan,3.780096e+04
4,311 Toronto,311 Development,311 PMO,Expenses,Benefits,Benefits,Employment Insurance,2.299296e+04
...,...,...,...,...,...,...,...,...
20114,Yonge-Dundas Square,Public Square And Event Venue,Not assigned,Expenses,Service And Rent,Services,Other Expenses,7.145000e+05
20115,Yonge-Dundas Square,Public Square And Event Venue,Not assigned,Expenses,Service And Rent,Services,Other Professional And Technical Service,6.600000e+04
20123,Yonge-Dundas Square,Public Square And Event Venue,Not assigned,Revenues,Contribution From Reserves/Reserve Funds,Contribution From Reserves/Reserve Funds,Contributions From Reserves,-3.351900e+04
20124,Yonge-Dundas Square,Public Square And Event Venue,Not assigned,Revenues,User Fees & Donations,User Fees,"Fees, Service Charges",-8.362500e+05


In [5]:
null_counts_per_year = {} #Empty to dictionary to store the null count 
for i, df in enumerate(df_list):
    year = 2019 + i
    
    # Calculate null values and sum for each column
    null_values = df.isnull()
    null_per_column = null_values.sum()
    
    # Store the result in the dictionary
    null_counts_per_year[year] = null_per_column

# Print the results
for year, null_counts in null_counts_per_year.items():
    print(f"Null counts for {year}:")
    print(null_counts)
    print()

Null counts for 2019:
Program              0
Service              0
Activity             0
Expense/Revenue      0
Category Name        0
Sub-Category Name    0
Commitment item      0
2019                 0
dtype: int64

Null counts for 2020:
Program              0
Service              0
Activity             0
Expense/Revenue      0
Category Name        0
Sub-Category Name    0
Commitment item      0
2020                 0
dtype: int64

Null counts for 2021:
Program              0
Service              0
Activity             0
Expense/Revenue      0
Category Name        0
Sub-Category Name    0
Commitment item      0
2021                 0
dtype: int64

Null counts for 2022:
Program              0
Service              0
Activity             0
Expense/Revenue      0
Category Name        0
Sub-Category Name    0
Commitment item      0
2022                 0
dtype: int64

Null counts for 2023:
Program              0
Service              0
Activity             0
Expense/Revenue      0
Catego

In [6]:
for i, df in enumerate(df_list):
    year = 2019 + i
    new_column_name = f'Budgeted amount {year}'
    df.rename(columns={year: new_column_name}, inplace=True)

In [7]:
format_sci_notation = lambda x: '{:.6e}'.format(float(x))
for df in df_list:
    for column in df.columns:
        if 'Budgeted amount' in column:
            df[column] = df[column].apply(format_sci_notation)

In [8]:
budget2019_df_exp = budget2019_df[budget2019_df['Expense/Revenue']=='Expenses']
budget2020_df_exp = budget2020_df[budget2020_df['Expense/Revenue']=='Expenses']
budget2021_df_exp = budget2021_df[budget2021_df['Expense/Revenue']=='Expenses']
budget2022_df_exp = budget2022_df[budget2022_df['Expense/Revenue']=='Expenses']
budget2023_df_exp = budget2023_df[budget2023_df['Expense/Revenue']=='Expenses']

In [9]:
budget2019_df_rev = budget2019_df[budget2019_df['Expense/Revenue']=='Revenues']
budget2020_df_rev = budget2020_df[budget2020_df['Expense/Revenue']=='Revenues']
budget2021_df_rev = budget2021_df[budget2021_df['Expense/Revenue']=='Revenues']
budget2022_df_rev = budget2022_df[budget2022_df['Expense/Revenue']=='Revenues']
budget2023_df_rev = budget2023_df[budget2023_df['Expense/Revenue']=='Revenues']