In [3]:
import pandas as pd

# Load the dataset
df = pd.read_csv("Final_Cleaned_Main_Dataset.csv")

# Extract Year and Quarter separately
df[["Quarter", "Year"]] = df["Quarter"].str.extract(r"(Q\d)\s(\d{4})")

# Convert Quarter to integer (Q1 → 1, Q2 → 2, etc.)
df["Quarter"] = df["Quarter"].str.extract(r"Q(\d)").astype(int)

# Convert Year to integer
df["Year"] = df["Year"].astype(int)

# Ensure correct column order
df = df[["Province", "Year", "Quarter", "Excellent_Very_Good", "Good", "Fair_Poor"]]

# Save the updated dataset
df.to_csv("Final_Cleaned_Main_Dataset_Updated.csv", index=False)

# Check the first few rows
print(df.head())


                    Province  Year  Quarter  Excellent_Very_Good  Good  \
0  Newfoundland and Labrador  2021        2                 46.6  37.2   
1       Prince Edward Island  2021        2                 48.7  33.1   
2                Nova Scotia  2021        2                 47.0  29.3   
3              New Brunswick  2021        2                 46.9  37.8   
4                     Quebec  2021        2                 53.0  34.3   

   Fair_Poor  
0       16.2  
1       18.1  
2       23.7  
3       15.2  
4       12.7  


In [7]:
import pandas as pd

# Load the cleaned dataset
df = pd.read_csv("Final_Cleaned_Main_Dataset_Updated.csv")

# Check if Q1 2023 is missing for each province
missing_q1_2023 = df[(df["Year"] == 2023) & (df["Quarter"] == 1)]
print(missing_q1_2023)


Empty DataFrame
Columns: [Province, Year, Quarter, Excellent_Very_Good, Good, Fair_Poor]
Index: []


In [12]:
import pandas as pd

# Load the cleaned dataset
df = pd.read_csv("Final_Cleaned_Main_Dataset_Updated.csv")

# Ensure "Year" and "Quarter" are numeric
df["Year"] = df["Year"].astype(int)
df["Quarter"] = df["Quarter"].astype(int)

# Find Q4 2022 and Q2 2023 values per province
q4_2022 = df[(df["Year"] == 2022) & (df["Quarter"] == 4)]
q2_2023 = df[(df["Year"] == 2023) & (df["Quarter"] == 2)]

# Merge both datasets on "Province" to align Q4 2022 and Q2 2023
df_q1_2023 = q4_2022.merge(q2_2023, on="Province", suffixes=("_Q4_2022", "_Q2_2023"))

# Apply Midpoint Estimation for all three mental health categories and round to 1 decimal place
for col in ["Excellent_Very_Good", "Good", "Fair_Poor"]:
    df_q1_2023[col] = ((df_q1_2023[f"{col}_Q4_2022"] + df_q1_2023[f"{col}_Q2_2023"]) / 2).round(1)

# Keep only necessary columns for Q1 2023
df_q1_2023 = df_q1_2023[["Province", "Excellent_Very_Good", "Good", "Fair_Poor"]]

# Explicitly set Year and Quarter for Q1 2023
df_q1_2023["Year"] = 2023
df_q1_2023["Quarter"] = 1

# Append the estimated Q1 2023 values to the dataset
df = pd.concat([df, df_q1_2023], ignore_index=True)

# Save the updated dataset with a correct name
df.to_csv("Final_Cleaned_Main_Dataset_Midpoint_Estimation.csv", index=False)

print("Q1 2023 estimated using Midpoint Estimation (Q4 2022 & Q2 2023 averages) and rounded to 1 decimal place.")
print("Dataset saved as: Final_Cleaned_Main_Dataset_Midpoint_Estimation.csv")


Q1 2023 estimated using Midpoint Estimation (Q4 2022 & Q2 2023 averages) and rounded to 1 decimal place.
Dataset saved as: Final_Cleaned_Main_Dataset_Midpoint_Estimation.csv
