In [3]:
import pandas as pd
import os
from datetime import datetime
from pathlib import Path
import numpy as np

In [4]:
def get_all_weather_params(weather_params):
    base_path = 'data/final-weather'
    for year_folder in os.listdir(base_path):
        year_path = os.path.join(base_path, year_folder)

        for week_num in range(1, 53):
            week_path = os.path.join(year_path, f'week_{week_num}')

            if not os.path.exists(week_path):
                print(f"Path {week_path} does not exist")
                continue

            file_list = Path(week_path).glob('*')

            for fine_area in file_list:
                fine_area_df = pd.read_csv(fine_area, skipinitialspace=True)  # skipinitialspace to remove extra spaces in column names
                weather_params.update(fine_area_df.columns)

In [5]:
weather_params = set()
get_all_weather_params(weather_params)
print(weather_params)
print(len(weather_params))

Path data/final-weather/quarterly/week_1 does not exist
Path data/final-weather/quarterly/week_2 does not exist
Path data/final-weather/quarterly/week_3 does not exist
Path data/final-weather/quarterly/week_4 does not exist
Path data/final-weather/quarterly/week_5 does not exist
Path data/final-weather/quarterly/week_6 does not exist
Path data/final-weather/quarterly/week_7 does not exist
Path data/final-weather/quarterly/week_8 does not exist
Path data/final-weather/quarterly/week_9 does not exist
Path data/final-weather/quarterly/week_10 does not exist
Path data/final-weather/quarterly/week_11 does not exist
Path data/final-weather/quarterly/week_12 does not exist
Path data/final-weather/quarterly/week_13 does not exist
Path data/final-weather/quarterly/week_14 does not exist
Path data/final-weather/quarterly/week_15 does not exist
Path data/final-weather/quarterly/week_16 does not exist
Path data/final-weather/quarterly/week_17 does not exist
Path data/final-weather/quarterly/week_1

In [6]:
def create_weather_df(long_splits, lat_splits, weather_params, weather_params_to_remove, all_week_dfs, remove_param_list):
    num_fine_areas = long_splits * lat_splits
    
    # First average and group all the values and figure out which columns don't meet the threshold

    base_path = 'data/final-weather'
    for year_num in range(2017, 2023):
        year_path = os.path.join(base_path, str(year_num))
        if not os.path.exists(year_path):
            print(f"Path {year_path} does not exist")
            continue

        for week_num in range(1, 53):
            week_path = os.path.join(year_path, f'week_{week_num}')
            if not os.path.exists(week_path):
                print(f"Path {week_path} does not exist")
                continue

            # create the 36 * 67 weekly dataframe
            week_df = pd.DataFrame(columns=weather_params)
            for i in range(1, num_fine_areas + 1):
                fine_area_path = week_path + f'/fine_area_{i}_weather.csv'
                print("Reading file: ", fine_area_path)

                # average the fine areas and add the averaged row to the dataframe
                if os.path.exists(fine_area_path):
                    fine_area_df = pd.read_csv(fine_area_path, skipinitialspace=True)
                    # Convert columns to numeric, handling errors with coerce to replace invalid values with NaN
                    fine_area_df = fine_area_df.apply(pd.to_numeric, errors='coerce')
                    # Calculate average of each column
                    averaged_row = fine_area_df.mean(axis=0).to_frame().T
                    averaged_row = averaged_row.reindex(columns=weather_params, fill_value=np.nan)

                    week_df = pd.concat([week_df, averaged_row], ignore_index=True)

                # add an empty row to the dataframe
                else:
                    empty_row = pd.DataFrame(columns=weather_params)
                    new_row = [float('nan')] * len(empty_row.columns)
                    empty_row.loc[len(empty_row)] = new_row
                    week_df = pd.concat([week_df, empty_row], ignore_index=True)

            # remove columns that don't meet the threshold
            threshold = 0.7
            nan_threshold = len(week_df) * threshold
            columns_to_drop = week_df.columns[week_df.isna().sum() > nan_threshold].tolist()
            remove_param_list.append(columns_to_drop)
            weather_params_to_remove.update(columns_to_drop)

            # add the week_df to the list of all week dataframes
            all_week_dfs.append(week_df)


In [7]:
all_week_dfs = []

long_splits = 6
lat_splits = 6
weather_params_to_remove = set()
remove_param_list = []
create_weather_df(long_splits, lat_splits, list(weather_params), weather_params_to_remove, all_week_dfs, remove_param_list)

Reading file:  data/final-weather/2017/week_1/fine_area_1_weather.csv
Reading file:  data/final-weather/2017/week_1/fine_area_2_weather.csv
Reading file:  data/final-weather/2017/week_1/fine_area_3_weather.csv
Reading file:  data/final-weather/2017/week_1/fine_area_4_weather.csv
Reading file:  data/final-weather/2017/week_1/fine_area_5_weather.csv
Reading file:  data/final-weather/2017/week_1/fine_area_6_weather.csv
Reading file:  data/final-weather/2017/week_1/fine_area_7_weather.csv
Reading file:  data/final-weather/2017/week_1/fine_area_8_weather.csv
Reading file:  data/final-weather/2017/week_1/fine_area_9_weather.csv
Reading file:  data/final-weather/2017/week_1/fine_area_10_weather.csv
Reading file:  data/final-weather/2017/week_1/fine_area_11_weather.csv
Reading file:  data/final-weather/2017/week_1/fine_area_12_weather.csv
Reading file:  data/final-weather/2017/week_1/fine_area_13_weather.csv
Reading file:  data/final-weather/2017/week_1/fine_area_14_weather.csv
Reading file:  

In [8]:
len(all_week_dfs)

312

In [9]:
print(weather_params_to_remove)
print(len(weather_params_to_remove))

{'max_wnd_spd_10m_pst1hr', 'ACTUAL_WIND_SPEED', 'rnfl_amt_pst1hr', 'HEIGHT_OF_SNOW', 'air_temp_2', 'SWE', 'rel_hum', 'air_temp', 'WSPD_SCLR', 'wind_gust_speed', 'mslp', 'MEASURED_WIND_DIRECTION1', 'MEASURED_WIND_SPEED1', 'snw_dpth', 'ONE_DAY_SNOW', 'Snow_WE', 'precipitation', 'dwpt_temp', 'ACTUAL_WIND_DIRECTION', 'air_temp_1', 'avg_wnd_spd_10m_pst10mts', 'cum_pcpn_amt', 'time', 'MIN_TEMP', 'avg_air_temp_pst1hr', 'mean_sea_level', 'HUMIDITY', 'MAXIMUM_AIR_TEMPERATURE', 'CURRENT_AIR_TEMPERATURE1', 'MAXIMUM_MEASURED_WIND_SPEED1', 'SNOW_ON_THE_GROUND', 'WDIR_VECT', 'air_temperature', 'PRECIPITATION_NEW', 'pcpn_amt_pst1hr', 'avg_rel_hum_pst1hr', 'temperature', 'WIND_DIRECTION_STD_DEVIATION1', 'TEMP_MEAN', 'stn_pres', 'min_air_temp_snc_last_reset', 'ONE_DAY_PRECIPITATION', 'ATMOSPHERIC_PRESSURE', 'avg_wnd_dir_10m_pst10mts', 'RELATIVE_HUMIDITY1', 'air_temperature_yesterday_low', 'AirTemp', 'relative_humidity', 'CURRENT_AIR_TEMPERATURE2', 'DEW_POINT', 'air_temperature_yesterday_high', 'dew_poi

In [10]:
opposite_result = weather_params.difference(weather_params_to_remove)
print(opposite_result)

{'week_of_year'}


In [11]:
new_weather_params_to_remove = set()

for df in all_week_dfs:
    threshold = 0.9
    nan_threshold = len(df) * threshold
    columns_to_drop = df.columns[df.isna().sum() > nan_threshold].tolist()
    new_weather_params_to_remove.update(columns_to_drop)

In [12]:
print(len(new_weather_params_to_remove))

63


In [13]:
print(len(new_weather_params_to_remove))
opposite_result = weather_params.difference(new_weather_params_to_remove)
print(opposite_result)

63
{'MAX_TEMP', 'week_of_year', 'pcpn_amt_pst1hr', 'MIN_TEMP'}


# Group by a year

In [44]:
dfs = all_week_dfs

# Define the number of dataframes in each group
num_dataframes_in_group = 52

# Calculate the number of groups
num_groups = len(dfs) // num_dataframes_in_group

# Create an empty list to store the new dataframes for each group
grouped_dfs = []

# Loop through each group
for group_num in range(num_groups):
    # Extract the dataframes for the current group
    current_group = dfs[group_num * num_dataframes_in_group : (group_num + 1) * num_dataframes_in_group]

    # Create a new dataframe for the current group with the same dimensions
    new_df = pd.DataFrame(index=current_group[0].index, columns=current_group[0].columns)

    # Calculate the average for each cell in the new dataframe
    for col in new_df.columns:
        for row in new_df.index:
            values = [df.loc[row, col] for df in current_group if pd.notna(df.loc[row, col])]
            if values:  # Check if values is not empty
                avg_value = np.mean(values)
                new_df.loc[row, col] = avg_value

    # Append the new dataframe to the list
    grouped_dfs.append(new_df)

# grouped_dfs now contains the new dataframes for each group
# You can access them like grouped_dfs[0], grouped_dfs[1], etc.

In [74]:
new_weather_params_to_remove = set()

i = 2016

for df in grouped_dfs:
    # df.to_csv(f'grouped_weather_year_{i}.csv', index=False)
    # i += 1
    threshold = 0.5
    nan_threshold = len(df) * threshold
    columns_to_drop = df.columns[df.isna().sum() > nan_threshold].tolist()
    new_weather_params_to_remove.update(columns_to_drop)

In [75]:
print(len(new_weather_params_to_remove))
opposite_result = weather_params.difference(new_weather_params_to_remove)
print(opposite_result)

61
{'wind_direction', 'relative_humidity', 'week_of_year', 'precipitation', 'temperature', 'wind_speed'}


# Group by months

In [110]:
dfs = all_week_dfs

# Define the number of dataframes in each group
num_dataframes_in_group = 4

# Calculate the number of groups
num_groups = len(dfs) // num_dataframes_in_group

# Create an empty list to store the new dataframes for each group
grouped_dfs = []

# Loop through each group
for group_num in range(num_groups):
    # Extract the dataframes for the current group
    current_group = dfs[group_num * num_dataframes_in_group : (group_num + 1) * num_dataframes_in_group]

    # Create a new dataframe for the current group with the same dimensions
    new_df = pd.DataFrame(index=current_group[0].index, columns=current_group[0].columns)

    # Calculate the average for each cell in the new dataframe
    for col in new_df.columns:
        for row in new_df.index:
            values = [df.loc[row, col] for df in current_group if pd.notna(df.loc[row, col])]
            if values:  # Check if values is not empty
                avg_value = np.mean(values)
                new_df.loc[row, col] = avg_value

    # Append the new dataframe to the list
    grouped_dfs.append(new_df)

# grouped_dfs now contains the new dataframes for each group
# You can access them like grouped_dfs[0], grouped_dfs[1], etc.

In [111]:
len(grouped_dfs)

78

In [112]:
new_weather_params_to_remove = set()

for df in grouped_dfs:
    threshold = 0.95
    nan_threshold = len(df) * threshold
    columns_to_drop = df.columns[df.isna().sum() > nan_threshold].tolist()
    new_weather_params_to_remove.update(columns_to_drop)

In [113]:
print(len(new_weather_params_to_remove))
opposite_result = weather_params.difference(new_weather_params_to_remove)
print(opposite_result)

50
{'wind_direction', 'MIN_TEMP', 'MAX_TEMP', 'relative_humidity', 'mean_sea_level', 'ONE_DAY_PRECIPITATION', 'wind_speed', 'dew_point', 'wind_gust_speed', 'week_of_year', 'air_temperature', 'pcpn_amt_pst1hr', 'snw_dpth', 'snw_dpth_wtr_equiv', 'tendency_amount', 'total_precipitation', 'air_temp_1'}


In [32]:
columns_to_drop = list(new_weather_params_to_remove)
columns_to_drop.append('week_of_year')
group = 1
for df in grouped_dfs:
    df.drop(columns=columns_to_drop, inplace=True)
    df.to_csv(f'data/final-weather/monthly/group_{group}.csv', index=False)
    group += 1

## Monthly Time Interpolation

In [114]:
year_grouped_months = []
month_grouped = []

i = 0
for df in grouped_dfs:
    month_grouped.append(df)
    i += 1
    if i % 12 == 0:
        year_grouped_months.append(month_grouped)
        month_grouped = []

In [115]:
year_interp_range = 1
count = 0

for year_i in range(len(year_grouped_months)):
    month_dfs = year_grouped_months[year_i]

    for month_i in range(len(month_dfs)):
        month_df = month_dfs[month_i]

        for row in month_df.index:
            for col in month_df.columns:
                
                if pd.isna(month_df.loc[row, col]):
                    interp_vals = []

                    for interp_step in range (1, year_interp_range + 1):
                        # check previous year
                        if year_i - interp_step >= 0: 
                            if year_grouped_months[year_i - interp_step][month_i].loc[row, col]:
                                df = year_grouped_months[year_i - interp_step][month_i]
                                if not pd.isna(df.loc[row, col]):
                                    val = year_grouped_months[year_i - interp_step][month_i].loc[row, col]
                                    interp_vals.append(val)

                        # check next year
                        if year_i + interp_step < len(year_grouped_months):
                            if year_grouped_months[year_i + interp_step][month_i].loc[row, col]:
                                df = year_grouped_months[year_i + interp_step][month_i]
                                if not pd.isna(df.loc[row, col]):
                                    val = year_grouped_months[year_i + interp_step][month_i].loc[row, col]
                                    interp_vals.append(val)

                    if len(interp_vals) <= 0:
                        print("interp step not big enough")
                        count += 1
                        # break
                    else: 
                        avg_interp_val = np.mean(interp_vals)
                        month_df.loc[row, col] = avg_interp_val

interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
interp step not big enough
i

In [116]:
grouped_dfs = []

print(len(year_grouped_months))
print(len(year_grouped_months[0]))

for year_i in range(len(year_grouped_months)):
    month_dfs = year_grouped_months[year_i]

    for month_i in range(len(month_dfs)):
        month_df = month_dfs[month_i]
        grouped_dfs.append(month_df)

6
12
   max_wnd_spd_10m_pst1hr ACTUAL_WIND_SPEED rnfl_amt_pst1hr HEIGHT_OF_SNOW  \
0                     NaN               NaN             NaN            NaN   
1                     NaN               NaN             NaN            NaN   
2                     NaN               NaN             NaN            NaN   
3                     NaN               NaN             NaN            NaN   
4                     NaN               NaN             NaN            NaN   
5                     NaN               NaN             NaN            NaN   
6                     NaN               NaN             NaN      92.484168   
7                     NaN               NaN             NaN            NaN   
8                     NaN               NaN             NaN            NaN   
9                     NaN               NaN             NaN      52.874068   
10                    NaN               NaN             NaN            NaN   
11                    NaN               NaN             NaN

In [117]:
len(grouped_dfs)

72

In [120]:
new_weather_params_to_remove = set()

for df in grouped_dfs:
    threshold = 0.7
    nan_threshold = len(df) * threshold
    columns_to_drop = df.columns[df.isna().sum() > nan_threshold].tolist()
    new_weather_params_to_remove.update(columns_to_drop)

In [121]:
print(len(grouped_dfs[0].columns))
print(len(new_weather_params_to_remove))
opposite_result = weather_params.difference(new_weather_params_to_remove)
print(len(opposite_result))
print(opposite_result)

67
61
6
{'wind_direction', 'relative_humidity', 'week_of_year', 'precipitation', 'temperature', 'wind_speed'}


In [122]:
columns_to_drop = list(new_weather_params_to_remove)
columns_to_drop.append('week_of_year')
group = 1
for df in grouped_dfs:
    df.drop(columns=columns_to_drop, inplace=True)
    df.to_csv(f'data/final-weather/monthly-interpolated/group_{group}.csv', index=False)
    group += 1

# Group quarterly

In [123]:
dfs = all_week_dfs

# Define the number of dataframes in each group
num_dataframes_in_group = 13

# Calculate the number of groups
num_groups = len(dfs) // num_dataframes_in_group

# Create an empty list to store the new dataframes for each group
grouped_dfs = []

# Loop through each group
for group_num in range(num_groups):
    # Extract the dataframes for the current group
    current_group = dfs[group_num * num_dataframes_in_group : (group_num + 1) * num_dataframes_in_group]

    # Create a new dataframe for the current group with the same dimensions
    new_df = pd.DataFrame(index=current_group[0].index, columns=current_group[0].columns)

    # Calculate the average for each cell in the new dataframe
    for col in new_df.columns:
        for row in new_df.index:
            values = [df.loc[row, col] for df in current_group if pd.notna(df.loc[row, col])]
            if values:  # Check if values is not empty
                avg_value = np.mean(values)
                new_df.loc[row, col] = avg_value

    # Append the new dataframe to the list
    grouped_dfs.append(new_df)

# grouped_dfs now contains the new dataframes for each group
# You can access them like grouped_dfs[0], grouped_dfs[1], etc.

In [124]:
len(grouped_dfs)

24

In [125]:
new_weather_params_to_remove = set()

for df in grouped_dfs:
    threshold = 0.53
    nan_threshold = len(df) * threshold
    columns_to_drop = df.columns[df.isna().sum() > nan_threshold].tolist()
    new_weather_params_to_remove.update(columns_to_drop)

In [126]:
print(len(new_weather_params_to_remove))
opposite_result = weather_params.difference(new_weather_params_to_remove)
print(len(opposite_result) - 1)
print(opposite_result)

61
5
{'wind_direction', 'relative_humidity', 'week_of_year', 'precipitation', 'temperature', 'wind_speed'}


In [127]:
columns_to_drop = list(new_weather_params_to_remove)
columns_to_drop.append('week_of_year')
group = 1
for df in grouped_dfs:
    df.drop(columns=columns_to_drop, inplace=True)
    # df.to_csv(f'data/final-weather/quarterly/group_{group}.csv', index=False)
    group += 1

## Spatial Interpolation

In [131]:
matrix = grouped_dfs[0]
matrix

Unnamed: 0,precipitation,temperature,relative_humidity,wind_direction,wind_speed
0,0.104312,-5.187596,80.7569,166.195039,10.271269
1,,,73.507728,220.699558,8.26137
2,0.038324,-0.631112,72.622744,163.276263,5.044251
3,,,73.507728,220.699558,8.26137
4,,,,,
5,0.102203,-3.425849,89.578898,122.653162,5.765309
6,0.05568,-6.419953,82.566546,180.141024,7.713385
7,,,,,
8,0.045604,-1.295421,75.382784,172.912088,5.65609
9,0.066257,-5.250741,78.78956,183.75089,5.40746


In [132]:
def index_to_coordinates(index, num_cols):
    # Convert flattened index to row, column coordinates
    row = index // num_cols
    col = index % num_cols
    return row, col

def coordinates_to_index(row, col, num_cols):
    # Convert row, column coordinates to flattened index
    return row * num_cols + col

def calculate_average(matrix, new_matrix, index, col_index, num_cols, depth):
    i, j = index_to_coordinates(index, num_cols)

    # Check if max depth is exceeded
    if (
        max(0, i - depth) == 0
        and max(0, j - depth) == 0
        and min(len(matrix) // num_cols, i + depth + 1) == len(matrix) // num_cols
        and min(num_cols, j + depth + 1) == num_cols
    ):
        print(f"max depth exceeded for index {index}")
        return

    neighbors_sum = 0
    count = 0

    # Iterate over the neighboring cells
    for x in range(max(0, i - depth), min(len(matrix) // num_cols, i + depth + 1)):
        for y in range(max(0, j - depth), min(num_cols, j + depth + 1)):
            if x == i and y == j:
                continue
            neighbor_index = coordinates_to_index(x, y, num_cols)

            if pd.notna(matrix.iloc[neighbor_index][col_index]):
                neighbors_sum += matrix.iloc[neighbor_index][col_index]
                count += 1

    # Calculate the average
    if count > 0:
        new_matrix.iloc[index, col_index] = neighbors_sum / count
    else:
        calculate_average(matrix, new_matrix, index, col_index, num_cols, depth + 1)  # If there are no neighbors, keep the current value

    return

def update_matrix(matrix, num_cols):
    new_matrix = matrix.copy()

    # Iterate over each cell in the matrix
    for i in range(len(matrix)):
        for j in range(len(matrix.columns)):
            if pd.isna(matrix.iloc[i, j]):
                # Calculate the average for the current cell
                calculate_average(matrix, new_matrix, i, j, num_cols, 1)

    return new_matrix


num_cols = 6

updated_matrix = update_matrix(matrix, num_cols)

print(matrix)
print(updated_matrix)

   precipitation temperature relative_humidity wind_direction wind_speed
0       0.104312   -5.187596           80.7569     166.195039  10.271269
1            NaN         NaN         73.507728     220.699558    8.26137
2       0.038324   -0.631112         72.622744     163.276263   5.044251
3            NaN         NaN         73.507728     220.699558    8.26137
4            NaN         NaN               NaN            NaN        NaN
5       0.102203   -3.425849         89.578898     122.653162   5.765309
6        0.05568   -6.419953         82.566546     180.141024   7.713385
7            NaN         NaN               NaN            NaN        NaN
8       0.045604   -1.295421         75.382784     172.912088    5.65609
9       0.066257   -5.250741          78.78956      183.75089    5.40746
10      0.000283   -5.456357          82.92904     148.495511   3.730852
11      0.102203   -3.425849         89.578898     122.653162   5.765309
12           NaN         NaN               NaN     

In [134]:
def index_to_coordinates(index, num_cols):
    # Convert flattened index to row, column coordinates
    row = index // num_cols
    col = index % num_cols
    return row, col

def coordinates_to_index(row, col, num_cols):
    # Convert row, column coordinates to flattened index
    return row * num_cols + col

def calculate_average(matrix, new_matrix, index, col_index, num_cols, depth):
    i, j = index_to_coordinates(index, num_cols)

    # Check if max depth is exceeded
    if (
        max(0, i - depth) == 0
        and max(0, j - depth) == 0
        and min(len(matrix) // num_cols, i + depth + 1) == len(matrix) // num_cols
        and min(num_cols, j + depth + 1) == num_cols
    ):
        print(f"max depth exceeded for index {index}")
        return

    neighbors_sum = 0
    count = 0

    # Iterate over the neighboring cells
    for x in range(max(0, i - depth), min(len(matrix) // num_cols, i + depth + 1)):
        for y in range(max(0, j - depth), min(num_cols, j + depth + 1)):
            if x == i and y == j:
                continue
            neighbor_index = coordinates_to_index(x, y, num_cols)

            if pd.notna(matrix.iloc[neighbor_index][col_index]):
                neighbors_sum += matrix.iloc[neighbor_index][col_index]
                count += 1

    # Calculate the average
    if count > 0:
        new_matrix.iloc[index, col_index] = neighbors_sum / count
    else:
        calculate_average(matrix, new_matrix, index, col_index, num_cols, depth + 1)  # If there are no neighbors, keep the current value

    return

def update_matrix(matrix, num_cols):
    new_matrix = matrix.copy()

    # Iterate over each cell in the matrix
    for i in range(len(matrix)):
        for j in range(len(matrix.columns)):
            if pd.isna(matrix.iloc[i, j]):
                # Calculate the average for the current cell
                calculate_average(matrix, new_matrix, i, j, num_cols, 1)

    return new_matrix


num_cols = 6

group = 1
for matrix in grouped_dfs:
    updated_matrix = update_matrix(matrix, num_cols)
    updated_matrix.to_csv(f'data/final-weather/quarterly-interpolated/group_{group}.csv', index=False)
    group += 1

In [None]:
def index_to_coordinates(index, num_cols):
    # Convert flattened index to row, column coordinates
    row = index // num_cols
    col = index % num_cols
    return row, col

def coordinates_to_index(row, col, num_cols):
    # Convert row, column coordinates to flattened index
    return row * num_cols + col

def calculate_average(matrix, new_matrix, index, col_index, num_cols, depth):
    i, j = index_to_coordinates(index, num_cols)

    # Check if max depth is exceeded
    if (
        max(0, i - depth) == 0
        and max(0, j - depth) == 0
        and min(len(matrix) // num_cols, i + depth + 1) == len(matrix) // num_cols
        and min(num_cols, j + depth + 1) == num_cols
    ):
        print(f"max depth exceeded for index {index}")
        return

    neighbors_sum = 0
    count = 0

    # Iterate over the neighboring cells
    for x in range(max(0, i - depth), min(len(matrix) // num_cols, i + depth + 1)):
        for y in range(max(0, j - depth), min(num_cols, j + depth + 1)):
            if x == i and y == j:
                continue
            neighbor_index = coordinates_to_index(x, y, num_cols)

            if pd.notna(matrix.iloc[neighbor_index][col_index]):
                neighbors_sum += matrix.iloc[neighbor_index][col_index]
                count += 1

    # Calculate the average
    if count > 0:
        new_matrix.iloc[index, col_index] = neighbors_sum / count
    else:
        calculate_average(matrix, new_matrix, index, col_index, num_cols, depth + 1)  # If there are no neighbors, keep the current value

    return

def update_matrix(matrix, num_cols):
    new_matrix = matrix.copy()

    # Iterate over each cell in the matrix
    for i in range(len(matrix)):
        for j in range(len(matrix.columns)):
            if pd.isna(matrix.iloc[i, j]):
                # Calculate the average for the current cell
                calculate_average(matrix, new_matrix, i, j, num_cols, 1)

    return new_matrix


num_cols = 6

group = 1
for matrix in grouped_dfs:
    updated_matrix = update_matrix(matrix, num_cols)
    updated_matrix.to_csv(f'data/final-weather/quarterly-interpolated/group_{group}.csv', index=False)
    group += 1