In [None]:
@contextlib.contextmanager
def stdout_redirected(new_stdout):
    save_stdout = sys.stdout
    sys.stdout = new_stdout
    try:
        yield None
    finally:
        sys.stdout = save_stdout

In [None]:
def remove_eta_lines(run_number):
    input_file_path = f'.\\model_runs\\{run_number}\\log.txt'
    
    with open(input_file_path, 'r') as f:
        lines = f.readlines()

    # Filter out lines containing "- ETA: "
    lines = [line for line in lines if "- ETA: " not in line]

    # Save the modified content back to the same file
    with open(input_file_path, 'w') as f:
        f.writelines(lines)

In [None]:
def divide_range(a, b, x):
    x = x+1
    a = a -0.01
    b = b + 0.01

    if round(a,0) == 46:
        if x > 11:
            x = 11
            print("Warning: 10 is the highest number of divisions possible for latitudes. So the number of divsions is set to maximum that is 10.")
    
    if round(a,0)==10:
        if x > 15:
            x = 15
            print("Warning: 14 is the highest number of divisions possible for longitudes. So the number of divsions is set to maximum that is 14.")

    if x < 2:
        raise ValueError("Number of divisions must be at least 2")

    step = (b - a) / (x - 1)  # Calculate the step size for equal divisions
    result = [round(a + i * step, 2) for i in range(x)]

    return result

In [2]:
def generate_array(b):
    array = [b] * 12
    array[0] = array[1] = array[2] = array[11] = 1
    return array

[1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1]


In [None]:
def pad_month_fraction_array(month_fraction_array):
    # Define the indices where 1 should be added
    indices_to_pad = [0, 1, 2, 11]
    
    # Create the padded array by inserting 1 at specified indices
    month_fraction_array_padded = month_fraction_array.copy()
    for idx in indices_to_pad:
        month_fraction_array_padded.insert(idx, 1)
    
    return month_fraction_array_padded

In [None]:
def get_dates(ndsi_ds):
    time_ndsi = ndsi_ds["time"].values
    df_time = pd.DataFrame({"date": time_ndsi})
    df_time[["date"]]= df_time[['date']].astype(str)
    df_time['date'] = pd.to_datetime(df_time['date'])
    df_time['date'] = df_time['date'] + pd.Timedelta(hours=12)
    selected_dates = df_time['date'].values
    return selected_dates


In [None]:
def print_info_about_latitude_longitude_groups(temp):
    temp = temp[temp["Date"] == temp.Date[1]]
    grouped_latitudes = temp.groupby('Latitude_Group')['Latitude'].unique().to_dict()
    logger.info("Latitude Group")
    logger.info("{\n" + ",\n".join(f" {key}: {value}" for key, value in grouped_latitudes.items()) + "\n}")

    grouped_longitudes = temp.groupby('Longitude_Group')['Longitude'].unique().to_dict()
    logger.info("Longitude Group")
    logger.info("{\n" + ",\n".join(f" {key}: {value}" for key, value in grouped_longitudes.items()) + "\n}")

In [None]:
# Plot histogram for df
def plot_before_and_after_month_sampling(df, df_to_delete):
    fig1 = px.histogram(df, x='ndsi', nbins=100)
    fig1.update_layout(
        xaxis_title='Value',
        yaxis_title='Frequency',
    )

    # Plot histogram for df_to_delete
    fig2 = px.histogram(df_to_delete, x='ndsi', nbins=100)
    fig2.update_layout(
        xaxis_title='Value',
        yaxis_title='Frequency',
    )

    # Create subplots with two columns
    fig = make_subplots(rows=1, cols=2, subplot_titles=('Before', 'After'))
    fig.add_trace(fig1.data[0], row=1, col=2)
    fig.add_trace(fig2.data[0], row=1, col=1)

    fig.update_layout(
        title='Before and after zero sampling in mid-year months',
        width=1000,  # Total width of the combined plots
        height=400,  # Height of the combined plots
    )

    # Define the DPI and calculate the scale factor
    dpi = 600
    default_dpi = 72
    scale = dpi / default_dpi

    # Export the figure at 300 DPI
    plotly_image_path = os.path.join(folder_name, 'before_after_zero_sampling.png')
    fig.write_image(plotly_image_path, width=1000, height=400, scale=scale)

    # Show the figure
    fig.show()

In [None]:
def DataFrame(df, scenario_array, prediction_array):
    df = pd.DataFrame({
    "Date" : df["Date"].values,
    "Latitude" : df["Latitude"].values,
    "Longitude" : df["Longitude"].values,
    f"{scenario_array[0]}": prediction_array[0].flatten(),
    f"{scenario_array[1]}": prediction_array[1].flatten(),
    f"{scenario_array[2]}": prediction_array[2].flatten()
    })
    return df

In [None]:
def plot_to_show_month_wise_distribution(df_to_delete, df, month_fraction_array_padded, save):
    df_to_delete['Date'] = pd.to_datetime(df_to_delete['Date'])

    # Extract month from the 'Date' column
    df_to_delete['Month'] = df_to_delete['Date'].dt.month
    df['Month'] = df['Date'].dt.month

    # Create a new figure and subplots
    fig, axs = plt.subplots(4, 6, figsize=(20, 10))  # 4 rows, 8 columns for 12 months for both DataFrames


    # Plot histograms for each month in df
    for month in range(1, 13):
        row = (month - 1) // 3  # Calculate the row index for the subplot
        col = (month - 1) % 3   # Calculate the column index for the subplot
        
        # Filter data for the current month in df_to_delete
        month_data_df_to_delete = df_to_delete[df_to_delete['Month'] == month]['ndsi']
        
        # Plot histogram in the corresponding subplot for df_to_delete
        axs[row, col].hist(month_data_df_to_delete, bins=20, color='salmon', alpha=0.7)
        axs[row, col].set_title(f'Month {month} (before)')
        axs[row, col].set_xlabel('NDSI')
        axs[row, col].set_ylabel('Frequency')


    # Plot histograms for each month in df_to_delete on the right
    for month in range(1, 13):
        row = (month - 1) // 3  # Calculate the row index for the subplot
        col = (month - 1) % 3 + 3  # Shift to the right by 4 columns
        
        # Filter data for the current month in df
        month_data_df = df[df['Month'] == month]['ndsi']
        
        # Plot histogram in the corresponding subplot for df
        axs[row, col].hist(month_data_df, bins=20, color='skyblue', alpha=0.7)
        axs[row, col].set_title(f'Month {month} (after) | Fraction = {month_fraction_array_padded[month-1]}')
        axs[row, col].set_xlabel('NDSI')
        axs[row, col].set_ylabel('Frequency')
        
        
    df = df.drop(["Month"], axis=1)
    # Adjust layout and display the subplots
    plt.tight_layout()

    if save ==1:
        matplotlib_image_path = os.path.join(folder_name, 'each_month_zero_cleaning.png')
        plt.savefig(matplotlib_image_path)


    plt.show()


In [None]:
def get_filtered_dates_for_ndsi(limit_of_nans_in_a_timestep = 120):
    daily_nan_df = pd.read_csv(r".\number_of_nans_for_each_date.csv")
    daily_nan_df['check'] = np.where((daily_nan_df['Number of NaNs'] >= limit_of_nans_in_a_timestep), 1, 0)
    date_selection_df = daily_nan_df[daily_nan_df['check'] == 0]
    date_selection_df['Date'] = pd.to_datetime(date_selection_df['Date'])
    return date_selection_df['Date'].values

In [None]:

    
def to_array(data, index_climate_parameter):
    array = []
    n_time_steps = data[0].shape[0]
    n_lat, n_lon = data[0].shape[1], data[0].shape[2]
    data1 = data[index_climate_parameter]
    for b in range(n_time_steps):
        data2=data1[b]
        for c in range(n_lat):
            data3=data2[c]
            for d in range(n_lon):
                data4=data3[d]
                array.append(data4)
    array = np.array(array)
    return array


In [None]:
def get_min_max(df, parameter_name):
    param_data = df[df['parameter'] == parameter_name]
    if not param_data.empty:
        min_val = param_data['min'].values[0]
        max_val = param_data['max'].values[0]
        return min_val, max_val
    else:
        return None, None 

In [None]:

def to_array_ndsi(data):
    array = []
    n_time_steps = data.shape[0]
    n_lat, n_lon = data.shape[1], data.shape[2]
    data1 = data
    for b in range(n_time_steps):
        data2=data1[b]
        for c in range(n_lat):
            data3=data2[c]
            for d in range(n_lon):
                data4=data3[d]
                array.append(data4)
    array = np.array(array)
    return array

In [None]:
import math

def self_defineed_r2(y_test, y_pred):
    mean_true = np.mean(y_test)
    mean_pred = np.mean(y_pred)
    numerator = np.sum((y_test - mean_true)*(y_pred-mean_pred))
    denominator1 = math.sqrt(np.sum((y_test - mean_true)**2))
    denominator2 = math.sqrt(np.sum((y_pred - mean_pred)**2))

    self_defined_r2 =  (numerator/(denominator1*denominator2))**2
    return self_defined_r2

In [1]:
def pbias(y_test, y_pred):
    num = np.sum(y_test - y_pred)
    denom = np.sum(y_test)
    return num*100/denom


In [None]:
def nse(y_pred, y_test):
    mean_observed = np.mean(y_test)
    numerator = np.sum((y_test - y_pred)**2)
    denominator = np.sum((y_test - mean_observed)**2)
    nse_value = 1 - (numerator / denominator)
    return nse_value

In [None]:
def calculate_n_rmse(y_true, y_pred):
    rmse = np.sqrt(np.mean((y_true - y_pred)**2))
    mean_true = np.mean(y_test)
    n_rmse = rmse / mean_true
    return n_rmse