In [178]:
from collections import OrderedDict
import pandas as pd
import numpy as np
import hijri_converter
from hijri_converter import Hijri, Gregorian

In [179]:
from dateutil.relativedelta import relativedelta

In [180]:
def adjustable_range(start, stop=None, step=None):
    if stop is None:
        start, stop = 0, start

    if step is None: step = 1

    i = start
    while i < stop:
        change_bound = (yield i)
        if change_bound is None:
            i += step
        else:
            stop = change_bound

In [181]:
target_start_day_hijri = Hijri(1444,9,1) # Hijri date in the format 'yyyy-mm-dd'
target_end_day_hijri  = Hijri(1445,1,1) # Hijri date in the format 'yyyy-mm-dd'
is_different_year = (target_start_day_hijri.year!=target_end_day_hijri.year)

In [182]:
from hijri_converter import convert
def convert_date(date_string):
    return pd.to_datetime(date_string, errors = 'coerce')

df = pd.read_excel("../datasets/RequiredData.xlsx", sheet_name="Reservation List Data", converters={'Reservation Check-out Date ':convert_date  , 'Cancellation Date  ':convert_date , 'Reservation Check-in Date':convert_date ,'Reservation Creation Date (MM-DD-YYYY)':convert_date})

df.dropna(inplace=True, subset=['Reservation Creation Date (MM-DD-YYYY)', 'Reservation Check-in Date', 'Reservation Check-out Date ', 'Cancellation Date  '])
df['Hijri Check-out Date'] = df['Reservation Check-out Date '].apply(lambda x: convert.Gregorian(x.year, x.month, x.day).to_hijri())
df['Hijri Check-in Date'] = df['Reservation Check-in Date'].apply(lambda x: convert.Gregorian(x.year, x.month, x.day).to_hijri())


df.sort_values('Reservation Creation Date (MM-DD-YYYY)', inplace=True)

df['tmp'] = df['Reservation Creation Date (MM-DD-YYYY)'].apply(lambda x: convert.Gregorian(x.year, x.month, x.day).to_hijri())
df.insert(1, 'Reservation Creation Date Hijri', df.pop('tmp'))

df.reset_index(inplace=True, drop=True)
df.head(10)

Unnamed: 0,Reservation ID,Reservation Creation Date Hijri,Reservation Creation Date (MM-DD-YYYY),Reservation Check-in Date,Reservation Check-out Date,Cancellation Date,"Type\nallotment (1), sales allotment (2), direct hotel (3)or direct agent(4)",Reservation Total Price,Rooms,Hijri Check-out Date,Hijri Check-in Date
0,30,1441-02-20,2019-10-19,2020-01-11,2020-01-15,2019-10-26,1,4160.016,2,1441-05-20,1441-05-16
1,49,1441-02-21,2019-10-20,2019-12-08,2019-12-14,2019-10-31,1,4439.9985,1,1441-04-17,1441-04-11
2,43,1441-02-21,2019-10-20,2019-12-30,2019-12-31,2019-12-30,1,910.0035,1,1441-05-05,1441-05-04
3,95,1441-02-25,2019-10-24,2020-01-11,2020-01-17,2019-10-30,1,32040.036,5,1441-05-22,1441-05-16
4,197,1441-03-01,2019-10-29,2019-12-15,2019-12-19,2019-11-03,1,4960.032,2,1441-04-22,1441-04-18
5,244,1441-03-02,2019-10-30,2019-11-01,2019-11-02,2019-10-31,3,684.999,1,1441-03-05,1441-03-04
6,214,1441-03-02,2019-10-30,2019-11-01,2019-11-02,2019-10-31,1,609.9975,1,1441-03-05,1441-03-04
7,260,1441-03-03,2019-10-31,2019-12-25,2019-12-28,2019-10-31,1,2245.005,1,1441-05-02,1441-04-28
8,305,1441-03-05,2019-11-02,2020-01-05,2020-01-07,2020-01-01,1,1640.016,2,1441-05-12,1441-05-10
9,327,1441-03-06,2019-11-03,2019-12-31,2020-01-01,2019-11-03,1,3060.015,4,1441-05-06,1441-05-05


In [183]:
# Create a new DataFrame to store the filtered reservations
filtered_df = pd.DataFrame(columns=df.columns)


In [184]:
def hijri_to_greg(hijri_date):
    dt_greg = hijri_date.to_gregorian().ctime()
    dt_greg = pd.to_datetime(dt_greg)
    return dt_greg

def greg_to_hijri(greg_date):
    hijri_date = Gregorian(greg_date.year, greg_date.month, greg_date.day)
    hijri_date = hijri_date.to_hijri()
    return hijri_date

def increment_hijri(hijri_date, dateOffset=1):
    dt_greg = hijri_to_greg(hijri_date)
    dt_greg += pd.DateOffset(days=dateOffset)
    incremented_hijri_date = Gregorian(dt_greg.year, dt_greg.month, dt_greg.day).to_hijri()
    return incremented_hijri_date

In [185]:
# Iterate over each row in the original DataFrame
for index, row in df.iterrows():
    # Extract the check-in and check-out  Hijri dates 
    check_in_date_hijri = row['Hijri Check-in Date']
    check_out_date_hijri = row['Hijri Check-out Date']
    target_start_day_hijri_temp = Hijri(check_in_date_hijri.year, target_start_day_hijri.month, target_start_day_hijri.day) 
    target_end_day_hijri_temp = Hijri(check_out_date_hijri.year + is_different_year, target_end_day_hijri.month, target_end_day_hijri.day)



# Create a list to store the rows for each day within the range
    rows = []

    # Iterate over each day within the range
    current_day = check_in_date_hijri
    while current_day <= check_out_date_hijri:
        # Check if the current day falls within the check-in and check-out dates
        if target_start_day_hijri_temp <= current_day <= target_end_day_hijri_temp:
            # Append the row with the current day to the list
            rows.append(list(row)+[current_day])

        current_day = increment_hijri(current_day)

    if (rows == []):
        continue

    # Create a DataFrame for the rows for this reservation
    reservation_df = pd.DataFrame(rows, columns=list(df.columns)+['Target Date'])

    # Concatenate the reservation DataFrame with the filtered DataFrame
    filtered_df = pd.concat([filtered_df, reservation_df], ignore_index=True)



In [186]:
numeric_columns = [col for col in filtered_df.columns if 'date' not in col.lower()]
filtered_df[numeric_columns] = filtered_df[numeric_columns].apply(pd.to_numeric, errors='coerce').astype('float')
print(filtered_df.dtypes)

Reservation ID                                                                          float64
Reservation Creation Date Hijri                                                          object
Reservation Creation Date (MM-DD-YYYY)                                           datetime64[ns]
Reservation Check-in Date                                                        datetime64[ns]
Reservation Check-out Date                                                       datetime64[ns]
Cancellation Date                                                                datetime64[ns]
Type\nallotment (1),  sales allotment (2), direct hotel (3)or direct agent(4)           float64
Reservation Total Price                                                                 float64
Rooms                                                                                   float64
Hijri Check-out Date                                                                     object
Hijri Check-in Date                     

In [187]:
# def get_greg_cr_dates_per_target_hijri_year(df):
    

In [188]:
def get_cr_dates_per_year(df, are_dates_hijri=False):
    """
    Retrieve a list of DataFrame arrays containing reservation creation dates grouped by year.

    Parameters:
        df (pandas.DataFrame): The initial DataFrame.
        are_dates_hijri (bool): Flag indicating whether the dates are in Hijri format. 
                                Default is True.

    Returns:
        list: A list of DataFrame arrays, where each array contains reservation creation dates for a specific year.

    Example:
        >>> df = pd.DataFrame({
        ...     'Reservation Creation Date (MM-DD-YYYY)': ['01-01-2022', '01-02-2022', '01-01-2023', '01-02-2023'],
        ...     'Reservation Creation Date Hijri': ['1444-01-01', '1444-01-02', '1444-01-01', '1444-01-02']
        ... })
        >>> cr_dates_per_year_gregorian = get_cr_dates_per_year(df)
        >>> print(cr_dates_per_year_gregorian)
        [['01-01-2022', '01-02-2022'], ['01-01-2023', '01-02-2023']]
        >>> cr_dates_per_year_hijri = get_cr_dates_per_year(df, True)
        >>> print(cr_dates_per_year_hijri)
        [['1444-01-01', '1444-01-02']]
    """

    df = df.copy()

    if are_dates_hijri:
        df['Hijri Year'] = df['Reservation Creation Date Hijri'].apply(lambda x: x.year)
        # Group the DataFrame by year
        grouped = df.groupby(df['Hijri Year'])

        # Create separate DataFrame arrays for each year
        cr_dates_per_year = [group['Reservation Creation Date Hijri'].tolist() for _, group in grouped]

    else:
        # Group the DataFrame by year
        grouped = df.groupby(df['Reservation Creation Date (MM-DD-YYYY)'].dt.year)

        # Create separate DataFrame arrays for each year
        cr_dates_per_year = [group['Reservation Creation Date (MM-DD-YYYY)'].tolist() for _, group in grouped]
    cr_dates_per_year = [list(OrderedDict.fromkeys(dates_in_a_year)) for dates_in_a_year in cr_dates_per_year]
    return cr_dates_per_year

In [189]:
def get_hijri_dates_between_range(start_date, end_date):
    """
    Generate a list of Hijri dates between the given start and end dates (inclusive).

    Parameters:
        start_date (Hijri): The starting Hijri date of the range.
        end_date (Hijri): The ending Hijri date of the range.

    Returns:
        list: A list of Hijri dates between the start and end dates (inclusive).

    Example:
        >>> from hijri_converter import Hijri
        >>> start_date = Hijri(1444, 1, 1)
        >>> end_date = Hijri(1444, 1, 5)
        >>> hijri_dates_range = get_hijri_dates_between_range(start_date, end_date)
        >>> print(hijri_dates_range)
        [Hijri(1444, 1, 1), Hijri(1444, 1, 2), Hijri(1444, 1, 3), Hijri(1444, 1, 4), Hijri(1444, 1, 5)]
    """
    dates_range = []
    current_day = start_date
    while current_day <= end_date:
        dates_range.append(current_day)
        current_day = increment_hijri(current_day)
    return dates_range


In [190]:
def create_reservation_dataframe(year, is_year_hijri=False, end_date_at_year_beginning=False):
    """
    Create a dataframe with a column 'Reservation Creation Date' containing all the days in a specified year.

    Parameters:
        year (int): The year for which to create the dataframe.
        is_year_hijri (bool, optional): Flag indicating whether the year is in Hijri format. Default is False.
        end_date_at_year_beginning (bool, optional): Flag indicating whether to set the end date at the beginning of the next year.
                                                    Applicable only if is_year_hijri is True. Default is False.

    Returns:
        pandas.DataFrame: A dataframe with a single column 'Reservation Creation Date' containing all the days in the specified year.

    Example:
        >>> # Example with Gregorian dates
        >>> df_gregorian = create_reservation_dataframe(2023)
        >>> print(df_gregorian)
           Reservation Creation Date
        0                2023-01-01
        1                2023-01-02
        2                2023-01-03
        ...              ...
        364              2023-12-31

        >>> # Example with Hijri dates
        >>> from hijri_converter import Hijri
        >>> df_hijri = create_reservation_dataframe(1444, is_year_hijri=True)
        >>> print(df_hijri)
           Reservation Creation Date
        0                1444-01-01
        1                1444-01-02
        2                1444-01-03
        ...              ...
        354              1444-12-24
        355              1444-12-25
        356              1444-12-26
        357              1444-12-27
        358              1444-12-28
        359              1444-12-29
        360              1444-12-30
        361              1444-12-31
    """
    if is_year_hijri:
        start_date = Hijri(year, 1, 1)
        if end_date_at_year_beginning:
            end_date = Hijri(year+1, 1, 1)
        else:
            end_date = Hijri(year, 12, Hijri(year, 12, 1).month_length())
        dates = get_hijri_dates_between_range(start_date, end_date)
    else:
        start_date = pd.to_datetime(f'{year}-01-01')
        end_date = pd.to_datetime(f'{year}-12-31')
        dates = pd.date_range(start=start_date, end=end_date, freq='D')

    df = pd.DataFrame({'Reservation Creation Date': dates})

    return df

In [191]:
def get_target_dates_history_dfs_and_years(filtered_df, add_start_of_new_year_as_a_row=True):
    """
    Create target dates history DataFrames and their corresponding years.

    This function processes the filtered DataFrame containing reservation data with Hijri dates.
    It generates target dates history DataFrames for each year in the filtered data and stores them
    along with their corresponding years in separate lists.

    Parameters:
        filtered_df (pandas.DataFrame): The filtered DataFrame containing reservation data with Hijri dates.
        add_start_of_new_year_as_a_row (bool, optional): Flag indicating whether to add a row for the start of the new year
                                                         in the target dates history DataFrames. Default is True.

    Returns:
        tuple: A tuple containing two lists - target_dates_history_dfs and target_dates_history_years.
                target_dates_history_dfs (list): A list of DataFrames containing target dates history
                                                for each year in the filtered data.
                target_dates_history_years (list): A list of integers representing the years for which
                                                    the target dates history DataFrames are generated.

    Example:
        >>> # Sample DataFrame with Hijri dates
        >>> filtered_df = pd.DataFrame({
        ...     'Reservation Creation Date Hijri': ['1444-01-01', '1444-01-01', '1444-01-02', '1445-02-17'],
        ...     'Target Date': ['1444-01-03', '1444-01-03', '1445-01-04', '1445-01-05'],
        ...     'Rooms': [3, 2, 7, 8]
        ... })
        >>> target_start_day_hijri = Hijri(1444, 1, 1)
        >>> target_end_day_hijri = Hijri(1444, 1, 5)
        >>> is_different_year = 0
        >>> target_dates_history_dfs, target_dates_history_years = get_target_dates_history_dfs_and_years(
        ...     filtered_df, add_start_of_new_year_as_a_row=True
        ... )
        >>> print(target_dates_history_years)
        [1444]
        >>> print(target_dates_history_dfs[0])
            Reservation Creation Date  1444-01-01  1444-01-02  1444-01-03  1444-01-04  1444-01-05
        0                  1444-01-01         NaN         NaN         5.0         NaN         NaN
        1                  1444-01-02         NaN         NaN         NaN         7.0         NaN
        ...
        365                1445-01-01         NaN         NaN         NaN         NaN         NaN
        >>> Note: the last row here will be 1444-12-30 if the the add_start_of_new_year_as_a_row flag is False.
        """
    global target_start_day_hijri, target_end_day_hijri, is_different_year
    cr_dates_per_year = get_cr_dates_per_year(filtered_df, are_dates_hijri=True)
    target_dates_history_years = []
    target_dates_history_dfs = []
    for annually_creation_dates_hijri in cr_dates_per_year:
        target_start_day_hijri_temp = Hijri(annually_creation_dates_hijri[0].year, 
                                            target_start_day_hijri.month, 
                                            target_start_day_hijri.day)
        target_end_day_hijri_temp = Hijri(annually_creation_dates_hijri[0].year + is_different_year, 
                                            target_end_day_hijri.month, 
                                            target_end_day_hijri.day)
        
        target_dates_range = get_hijri_dates_between_range(target_start_day_hijri_temp, target_end_day_hijri_temp)
        # Creating empty df
        annually_target_dates_history_df = create_reservation_dataframe(annually_creation_dates_hijri[0].year, 
                                                                        is_year_hijri=True, end_date_at_year_beginning=add_start_of_new_year_as_a_row)
        # Creates columns where the column name is a Hijri() object (displayed as Y-m-d by __str()__ method in Hijri class)
        for column in target_dates_range:
            annually_target_dates_history_df[column] = np.nan
        
        for creation_date_hijri in annually_creation_dates_hijri:
            target_creation_dates_mask = (annually_target_dates_history_df['Reservation Creation Date'] == creation_date_hijri)
            filtered_creation_dates_mask = (filtered_df['Reservation Creation Date Hijri'] == creation_date_hijri)
            filtered_target_dates_range_mask = (filtered_df['Target Date'] < Hijri(creation_date_hijri.year+1, target_end_day_hijri.month, target_end_day_hijri.day))
            filtered_target_dates = [dt for dt in list(filtered_df[filtered_creation_dates_mask & filtered_target_dates_range_mask]['Target Date'])]

            filtered_rooms = list(filtered_df[filtered_creation_dates_mask & filtered_target_dates_range_mask]['Rooms'])
            
            target_date_to_rooms = {}
            for i, target_date in enumerate(filtered_target_dates):
                if target_date not in target_date_to_rooms.keys():
                    target_date_to_rooms[target_date] = 0
                target_date_to_rooms[target_date] += filtered_rooms[i]    

            annually_target_dates_history_df.loc[target_creation_dates_mask, # to select row in ann_td_h_df where creation date is the current one in the loop
                                                    list(target_date_to_rooms.keys()) # to update the column names of said row
                                                ] \
                                                = list(target_date_to_rooms.values()) # updates it to Rooms
            
        target_dates_history_dfs.append(annually_target_dates_history_df)
        target_dates_history_years.append(annually_creation_dates_hijri[0].year)

    return target_dates_history_dfs, target_dates_history_years

target_dates_history_dfs, target_dates_history_years = get_target_dates_history_dfs_and_years(filtered_df, add_start_of_new_year_as_a_row=True)

  annually_target_dates_history_df[column] = np.nan
  annually_target_dates_history_df[column] = np.nan
  annually_target_dates_history_df[column] = np.nan
  annually_target_dates_history_df[column] = np.nan
  annually_target_dates_history_df[column] = np.nan
  annually_target_dates_history_df[column] = np.nan
  annually_target_dates_history_df[column] = np.nan
  annually_target_dates_history_df[column] = np.nan
  annually_target_dates_history_df[column] = np.nan
  annually_target_dates_history_df[column] = np.nan
  annually_target_dates_history_df[column] = np.nan
  annually_target_dates_history_df[column] = np.nan
  annually_target_dates_history_df[column] = np.nan
  annually_target_dates_history_df[column] = np.nan
  annually_target_dates_history_df[column] = np.nan
  annually_target_dates_history_df[column] = np.nan
  annually_target_dates_history_df[column] = np.nan
  annually_target_dates_history_df[column] = np.nan
  annually_target_dates_history_df[column] = np.nan
  annually_t

In [192]:
target_dates_history_dfs[0]

Unnamed: 0,Reservation Creation Date,1441-09-01,1441-09-02,1441-09-03,1441-09-04,1441-09-05,1441-09-06,1441-09-07,1441-09-08,1441-09-09,...,1441-12-21,1441-12-22,1441-12-23,1441-12-24,1441-12-25,1441-12-26,1441-12-27,1441-12-28,1441-12-29,1442-01-01
0,1441-01-01,,,,,,,,,,...,,,,,,,,,,
1,1441-01-02,,,,,,,,,,...,,,,,,,,,,
2,1441-01-03,,,,,,,,,,...,,,,,,,,,,
3,1441-01-04,,,,,,,,,,...,,,,,,,,,,
4,1441-01-05,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
351,1441-12-26,,,,,,,,,,...,,,,,,,,,,
352,1441-12-27,,,,,,,,,,...,,,,,,,,,,
353,1441-12-28,,,,,,,,,,...,,,,,,,,,,
354,1441-12-29,,,,,,,,,,...,,,,,,,,,,


In [193]:
def calculate_target_dates_accumulation(target_dates_history_dfs):
    """
    Calculate the target dates' accumulation in the target dates history DataFrames.

    This function takes a list of DataFrames, each representing the target dates history for a specific year,
    and calculates the cumulative target dates accumulation in each DataFrame.

    Parameters:
        target_dates_history_dfs (list): A list of DataFrames containing target dates history
                                         for each year.

    Returns:
        list: A list of DataFrames with the cumulative target dates accumulation for each year.

    Example:
        >>> # Assuming target_dates_history_dfs is the result of get_target_dates_history_dfs_and_years()
        >>> target_dates_accum_history_dfs = target_dates_history_dfs.copy()
        >>> target_dates_accum_history_dfs = calculate_target_dates_accumulation(target_dates_accum_history_dfs)
        >>> for tdacc_df in target_dates_accum_history_dfs:
        ...     print(tdacc_df)
                    Reservation Creation Date  1444-01-01  1444-01-02  1444-01-03  1444-01-04  1444-01-05
            0                  1444-01-01         0.0         0.0         5.0         0.0         0.0
            1                  1444-01-02         0.0         0.0         NaN         7.0         0.0
            ...
            365                1445-01-01         NaN         NaN         NaN         NaN         NaN
    """
    target_dates_accum_history_dfs = []
    for df in target_dates_history_dfs:
        target_dates_accum_history_dfs.append(df.copy())

    for tdacc_df in target_dates_accum_history_dfs:
        # cumulatively add each row to the row below it
        tdacc_df.fillna(0, inplace=True)
        tdacc_df[tdacc_df.columns[1:]] = tdacc_df[tdacc_df.columns[1:]].cumsum()

        # when reaching the row where "Reservation Creation Date" is equal to the Column date, 
        # fill that column's remaining rows (after that row) with 0s 
        for i, column_hijri in enumerate(tdacc_df.columns):
            if not isinstance(column_hijri, Hijri):
                continue 
            matching_dates_row_idx = tdacc_df.loc[tdacc_df['Reservation Creation Date']==column_hijri].index[0]
            if matching_dates_row_idx == len(tdacc_df)-1:
                continue
            tdacc_df.iloc[matching_dates_row_idx+1:, i] = np.nan

    return target_dates_accum_history_dfs

In [194]:
target_dates_accum_history_dfs = calculate_target_dates_accumulation(target_dates_history_dfs)

In [195]:
def display_full_df(df):
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):
        display(df)

In [196]:
target_dates_history_dfs[0]

Unnamed: 0,Reservation Creation Date,1441-09-01,1441-09-02,1441-09-03,1441-09-04,1441-09-05,1441-09-06,1441-09-07,1441-09-08,1441-09-09,...,1441-12-21,1441-12-22,1441-12-23,1441-12-24,1441-12-25,1441-12-26,1441-12-27,1441-12-28,1441-12-29,1442-01-01
0,1441-01-01,,,,,,,,,,...,,,,,,,,,,
1,1441-01-02,,,,,,,,,,...,,,,,,,,,,
2,1441-01-03,,,,,,,,,,...,,,,,,,,,,
3,1441-01-04,,,,,,,,,,...,,,,,,,,,,
4,1441-01-05,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
351,1441-12-26,,,,,,,,,,...,,,,,,,,,,
352,1441-12-27,,,,,,,,,,...,,,,,,,,,,
353,1441-12-28,,,,,,,,,,...,,,,,,,,,,
354,1441-12-29,,,,,,,,,,...,,,,,,,,,,


In [197]:
def process_weekly_target_dates_accumulation(target_dates_accum_history_dfs):
    """
    Process weekly target dates accumulation DataFrames.

    This function takes a list of DataFrames, each representing the target dates accumulation for a specific year,
    and processes them to create weekly target dates accumulation DataFrames. Each weekly DataFrame includes aggregated
    data for a week's range of target dates, with trailing zero values converted to NaN.

    Parameters:
        target_dates_accum_history_dfs (list): A list of DataFrames containing cumulative target dates accumulation
                                               for each year.

    Returns:
        list: A list of DataFrames with weekly target dates accumulation for each year.

        >>> # Sample output of weekly_target_dates_accum_dfs for one year
        >>> print(weekly_target_dates_accum_dfs[0])
            Reservation Creation Date  W1 - Ramadhan      W2 - Ramadhan  ...  W3 - Dhu al-Hijjah  W4 - Dhu al-Hijjah  W1 - Dhu al-Hijjah/Muharram
        0                  1441-01-01          0.0                  0.0  ...           0.0           0.0                     0.0
        1                  1441-01-02          0.0                  0.0  ...           0.0           0.0                     0.0
        2                  1441-01-03          5.0                  0.0  ...           0.0           0.0                     0.0
        ...                ...               ...                    ...  ...           ...           ...                     ...
        353                1441-12-28          NaN                  NaN  ...           NaN           NaN                     NaN
        354                1441-12-29          NaN                  NaN  ...           NaN           NaN                     NaN
        355                1442-01-01          NaN                  NaN  ...           NaN           NaN                     NaN

    Note: The DataFrames in weekly_target_dates_accum_dfs will contain aggregated values for each week's target date range,
    with trailing zero values converted to NaN to avoid displaying inaccurate data.
    """
    weekly_target_dates_accum_dfs = []
    for tdacc_df in target_dates_accum_history_dfs:
        new_columns = ['Reservation Creation Date']
        new_agg_values = [tdacc_df.iloc[:, 0].to_list()]
        df_columns = tdacc_df.columns
        j = 0
        start_date_idx = 1
        while start_date_idx < len(df_columns):
            #incrment j by one to detect last week 
            j += 1 
            # get column name of this week's range
            end_date_idx = min(start_date_idx+6, len(df_columns)-1)
            week_num = j
            if week_num % 4 == 0:
                week_num = 4
            else:
                week_num %= 4
            column_name = f'W{week_num} - ' + df_columns[start_date_idx].month_name()
            # if df_columns[start_date_idx].month != df_columns[end_date_idx].month:
            #     column_name += '/' + df_columns[end_date_idx].month_name()
            new_columns.append(column_name)
            
            # if this is the last week in the month add all the remaning days in current month to last week
            if j % 4 == 0:
                mlen = tdacc_df.columns[start_date_idx].month_length()
                tlen = mlen - 21
                # get sum of Rooms of this week's range as a single list
                new_agg_values.append(tdacc_df.iloc[:, start_date_idx:start_date_idx+tlen].sum(axis=1, skipna=True).to_list())
                start_date_idx += tlen - 7 
            else:
                new_agg_values.append(tdacc_df.iloc[:, start_date_idx:start_date_idx+7].sum(axis=1, skipna=True).to_list())
            # print(tdacc_df.iloc[:, start_date_idx:start_date_idx+7])
            start_date_idx += 7
            
        weekly_tdacc_df = pd.DataFrame(np.array(new_agg_values).T, columns=new_columns)

        # convert all trailing zero values (per column) to np.NaN values
        for column in weekly_tdacc_df.columns:
            if 'date' in column.lower():
                continue
            mask = weekly_tdacc_df[column] != 0
            last_nonzero_index = mask[::-1].idxmax()
            if last_nonzero_index < len(weekly_tdacc_df)-1:
                weekly_tdacc_df.loc[last_nonzero_index + 1:, column] = np.nan

        weekly_target_dates_accum_dfs.append(weekly_tdacc_df)
    
    for df in weekly_target_dates_accum_dfs:
        df.drop(df.columns[-1], axis=1, inplace=True)

    return weekly_target_dates_accum_dfs

In [198]:
weekly_target_dates_accum_dfs = process_weekly_target_dates_accumulation(target_dates_accum_history_dfs)

In [199]:
weekly_target_dates_accum_dfs[0]

Unnamed: 0,Reservation Creation Date,W1 - Ramadhan,W2 - Ramadhan,W3 - Ramadhan,W4 - Ramadhan,W1 - Shawwal,W2 - Shawwal,W3 - Shawwal,W4 - Shawwal,W1 - Dhu al-Qi’dah,W2 - Dhu al-Qi’dah,W3 - Dhu al-Qi’dah,W4 - Dhu al-Qi’dah,W1 - Dhu al-Hijjah,W2 - Dhu al-Hijjah,W3 - Dhu al-Hijjah,W4 - Dhu al-Hijjah
0,1441-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1441-01-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1441-01-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1441-01-04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1441-01-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
351,1441-12-26,,,,,,,,0.0,,,,,,,,3380.0
352,1441-12-27,,,,,,,,0.0,,,,,,,,2535.0
353,1441-12-28,,,,,,,,0.0,,,,,,,,1690.0
354,1441-12-29,,,,,,,,0.0,,,,,,,,845.0


In [200]:
# visualized example in markdowen below |
#                                       v
def handle_accumulated_rooms(weekly_target_dates_accum_dfs):
    """
    This function deals with a corner case where the accumulated rooms for each week
    get decreased in the rows corresponding to the target week. It updates the accumulated
    room values to address this discrepancy.

    Parameters:
        weekly_target_dates_accum_dfs (list of DataFrames): A list of pandas DataFrames
        containing weekly data for accumulated rooms with columns representing dates.

    Explanation:
        The function iterates through each DataFrame in the provided list, representing
        the accumulated rooms for different weeks. It then processes each column (date) in
        the DataFrame, excluding columns containing 'date' in their name.

        For each column, it finds the starting index of the target week by locating the last
        non-null value in that column (the last recorded value for the week) and subtracting 6
        (representing 6 days in a week) to get the index of the first day of the target week.

        Next, it iterates over 7 days (a week) starting from the target week's start index and
        updates the accumulated room values for each day. The update is based on the previous day's
        value and the difference between the previous day's value and the current day's value.

        The purpose of this process is to handle the case where there might be a discrepancy in
        the accumulated room values for the target week, resulting in a decrease. This discrepancy
        could occur due to adjustments or corrections made to the data.

    Note:
        - This code assumes that the DataFrames in the input list have columns representing dates.
        - The provided 'weekly_target_dates_accum_dfs' list is modified in-place, and no new list
          is returned.

    Example usage:
        weekly_data = [...]  # List of DataFrames containing weekly accumulated room data
        handle_accumulated_rooms(weekly_data)
    """
    for df in weekly_target_dates_accum_dfs:
        for col in df.columns:
            if 'date' in col.lower():
                continue
            current_target_week_start_idx = df.loc[:, col].dropna().index[-1] - 6
            for i in range(current_target_week_start_idx+1, current_target_week_start_idx+7):
                df.loc[i, col] += df.loc[i-1, col] - df.loc[i, col]


handle_accumulated_rooms() explanation by example:

Before:&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; After:


<img src="../project_media/before_acc.png" alt="Alt Text" width="300" >  
<img src="../project_media/after_acc.png" alt="Alt Text" width="300">




In [201]:
handle_accumulated_rooms(weekly_target_dates_accum_dfs)

In [202]:
weekly_target_dates_accum_dfs[0]

Unnamed: 0,Reservation Creation Date,W1 - Ramadhan,W2 - Ramadhan,W3 - Ramadhan,W4 - Ramadhan,W1 - Shawwal,W2 - Shawwal,W3 - Shawwal,W4 - Shawwal,W1 - Dhu al-Qi’dah,W2 - Dhu al-Qi’dah,W3 - Dhu al-Qi’dah,W4 - Dhu al-Qi’dah,W1 - Dhu al-Hijjah,W2 - Dhu al-Hijjah,W3 - Dhu al-Hijjah,W4 - Dhu al-Hijjah
0,1441-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1441-01-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1441-01-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1441-01-04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1441-01-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
351,1441-12-26,,,,,,,,0.0,,,,,,,,5282.0
352,1441-12-27,,,,,,,,0.0,,,,,,,,5282.0
353,1441-12-28,,,,,,,,0.0,,,,,,,,5282.0
354,1441-12-29,,,,,,,,0.0,,,,,,,,5282.0


In [203]:
len(weekly_target_dates_accum_dfs)

2

save weekly dfs to xlxs 

In [204]:
for index, df in enumerate(weekly_target_dates_accum_dfs, start=0):
    file_name = f"weekly_target_dates_accum_df_{index}.xlsx"
    df.to_excel(file_name, index=False)
    print(f"DataFrame {index} saved as {file_name}")

DataFrame 0 saved as weekly_target_dates_accum_df_0.xlsx
DataFrame 1 saved as weekly_target_dates_accum_df_1.xlsx


For months

In [205]:
def process_monthly_target_dates_accumulation(weekly_target_dates_accum_dfs): 
    """
    Process weekly target dates accumulation DataFrames.

    This function takes a list of DataFrames, each representing the target dates accumulation for a specific year,
    and processes them to create weekly target dates accumulation DataFrames. Each weekly DataFrame includes aggregated
    data for a week's range of target dates, with trailing zero values converted to NaN.

    Parameters:
        target_dates_accum_history_dfs (list): A list of DataFrames containing cumulative target dates accumulation
                                               for each year.

    Returns:
        list: A list of DataFrames with weekly target dates accumulation for each year.

        >>> # Sample output of weekly_target_dates_accum_dfs for one year
        >>> print(weekly_target_dates_accum_dfs[0])
            Reservation Creation Date  W1 - Ramadhan      W2 - Ramadhan  ...  W3 - Dhu al-Hijjah  W4 - Dhu al-Hijjah  W1 - Dhu al-Hijjah/Muharram
        0                  1441-01-01          0.0                  0.0  ...           0.0           0.0                     0.0
        1                  1441-01-02          0.0                  0.0  ...           0.0           0.0                     0.0
        2                  1441-01-03          5.0                  0.0  ...           0.0           0.0                     0.0
        ...                ...               ...                    ...  ...           ...           ...                     ...
        353                1441-12-28          NaN                  NaN  ...           NaN           NaN                     NaN
        354                1441-12-29          NaN                  NaN  ...           NaN           NaN                     NaN
        355                1442-01-01          NaN                  NaN  ...           NaN           NaN                     NaN

    Note: The DataFrames in weekly_target_dates_accum_dfs will contain aggregated values for each week's target date range,
    with trailing zero values converted to NaN to avoid displaying inaccurate data.
    """
    monthly_target_dates_accum_dfs = []
    for wtdacc_df in weekly_target_dates_accum_dfs:
        new_columns = ['Reservation Creation Date']
        new_agg_values = [wtdacc_df.iloc[:, 0].to_list()]
        df_columns = wtdacc_df.columns
        for start_date_idx in range(1, len(df_columns), 4):
            # get column name of this week's range
            end_date_idx = min(start_date_idx+4, len(df_columns)-1)
            column_name = f'M{(start_date_idx//4) + 1} - ' #+ df_columns[start_date_idx].month_name()
            #if df_columns[start_date_idx].month != df_columns[end_date_idx].month:
                #column_name += '/' + df_columns[end_date_idx].month_name()
            new_columns.append(column_name)

            # get sum of Rooms of this week's range as a single list
            new_agg_values.append(wtdacc_df.iloc[:, start_date_idx:start_date_idx+4].sum(axis=1, skipna=True).to_list())
            print(wtdacc_df.iloc[:, start_date_idx:start_date_idx+4].columns)
            
        monthly_tdacc_df = pd.DataFrame(np.array(new_agg_values).T, columns=new_columns)

        # convert all trailing zero values (per column) to np.NaN values
        for column in monthly_tdacc_df.columns:
            if 'date' in column.lower():
                continue
            mask = monthly_tdacc_df[column] != 0
            last_nonzero_index = mask[::-1].idxmax()
            if last_nonzero_index < len(monthly_tdacc_df)-1:
                monthly_tdacc_df.loc[last_nonzero_index + 1:, column] = np.nan

        monthly_target_dates_accum_dfs.append(monthly_tdacc_df)
        
    return monthly_target_dates_accum_dfs

In [206]:
monthly_target_dates_accum_dfs = process_monthly_target_dates_accumulation(weekly_target_dates_accum_dfs)

Index(['W1 - Ramadhan', 'W2 - Ramadhan', 'W3 - Ramadhan', 'W4 - Ramadhan'], dtype='object')
Index(['W1 - Shawwal', 'W2 - Shawwal', 'W3 - Shawwal', 'W4 - Shawwal'], dtype='object')
Index(['W1 - Dhu al-Qi’dah', 'W2 - Dhu al-Qi’dah', 'W3 - Dhu al-Qi’dah',
       'W4 - Dhu al-Qi’dah'],
      dtype='object')
Index(['W1 - Dhu al-Hijjah', 'W2 - Dhu al-Hijjah', 'W3 - Dhu al-Hijjah',
       'W4 - Dhu al-Hijjah'],
      dtype='object')
Index(['W1 - Ramadhan', 'W2 - Ramadhan', 'W3 - Ramadhan', 'W4 - Ramadhan'], dtype='object')
Index(['W1 - Shawwal', 'W2 - Shawwal', 'W3 - Shawwal', 'W4 - Shawwal'], dtype='object')
Index(['W1 - Dhu al-Qi’dah', 'W2 - Dhu al-Qi’dah', 'W3 - Dhu al-Qi’dah',
       'W4 - Dhu al-Qi’dah'],
      dtype='object')
Index(['W1 - Dhu al-Hijjah', 'W2 - Dhu al-Hijjah', 'W3 - Dhu al-Hijjah',
       'W4 - Dhu al-Hijjah'],
      dtype='object')


In [207]:
monthly_target_dates_accum_dfs[0]

Unnamed: 0,Reservation Creation Date,M1 -,M2 -,M3 -,M4 -
0,1441-01-01,0.0,0.0,0.0,0.0
1,1441-01-02,0.0,0.0,0.0,0.0
2,1441-01-03,0.0,0.0,0.0,0.0
3,1441-01-04,0.0,0.0,0.0,0.0
4,1441-01-05,0.0,0.0,0.0,0.0
...,...,...,...,...,...
351,1441-12-26,,,,5282.0
352,1441-12-27,,,,5282.0
353,1441-12-28,,,,5282.0
354,1441-12-29,,,,5282.0


In [208]:
def handle_accumulated_rooms_monthly(monthly_target_dates_accum_dfs):
    """
    This function deals with a corner case where the accumulated rooms for each week
    get decreased in the rows corresponding to the target week. It updates the accumulated
    room values to address this discrepancy.

    Parameters:
        weekly_target_dates_accum_dfs (list of DataFrames): A list of pandas DataFrames
        containing weekly data for accumulated rooms with columns representing dates.

    Explanation:
        The function iterates through each DataFrame in the provided list, representing
        the accumulated rooms for different weeks. It then processes each column (date) in
        the DataFrame, excluding columns containing 'date' in their name.

        For each column, it finds the starting index of the target week by locating the last
        non-null value in that column (the last recorded value for the week) and subtracting 6
        (representing 6 days in a week) to get the index of the first day of the target week.

        Next, it iterates over 7 days (a week) starting from the target week's start index and
        updates the accumulated room values for each day. The update is based on the previous day's
        value and the difference between the previous day's value and the current day's value.

        The purpose of this process is to handle the case where there might be a discrepancy in
        the accumulated room values for the target week, resulting in a decrease. This discrepancy
        could occur due to adjustments or corrections made to the data.

    Note:
        - This code assumes that the DataFrames in the input list have columns representing dates.
        - The provided 'weekly_target_dates_accum_dfs' list is modified in-place, and no new list
          is returned.

    Example usage:
        weekly_data = [...]  # List of DataFrames containing weekly accumulated room data
        handle_accumulated_rooms(weekly_data)
    """
    for df in monthly_target_dates_accum_dfs:
        for col in df.columns:
            if 'date' in col.lower():
                continue
            current_target_month_start_idx = df.loc[:, col].dropna().index[-1] - 29
            for i in range(current_target_month_start_idx+1, current_target_month_start_idx+30):
                df.loc[i, col] += df.loc[i-1, col] - df.loc[i, col]


In [209]:
handle_accumulated_rooms_monthly(monthly_target_dates_accum_dfs)

In [210]:
monthly_target_dates_accum_dfs[0]

Unnamed: 0,Reservation Creation Date,M1 -,M2 -,M3 -,M4 -
0,1441-01-01,0.0,0.0,0.0,0.0
1,1441-01-02,0.0,0.0,0.0,0.0
2,1441-01-03,0.0,0.0,0.0,0.0
3,1441-01-04,0.0,0.0,0.0,0.0
4,1441-01-05,0.0,0.0,0.0,0.0
...,...,...,...,...,...
351,1441-12-26,,,,15425.0
352,1441-12-27,,,,15425.0
353,1441-12-28,,,,15425.0
354,1441-12-29,,,,15425.0


pipline for normal target dates (not accum)

In [211]:
weekly_target_dates_dfs = process_weekly_target_dates_accumulation(target_dates_history_dfs)
handle_accumulated_rooms(weekly_target_dates_dfs)
monthly_target_dates_dfs = process_monthly_target_dates_accumulation(weekly_target_dates_dfs)

Index(['W1 - Ramadhan', 'W2 - Ramadhan', 'W3 - Ramadhan', 'W4 - Ramadhan'], dtype='object')
Index(['W1 - Shawwal', 'W2 - Shawwal', 'W3 - Shawwal', 'W4 - Shawwal'], dtype='object')
Index(['W1 - Dhu al-Qi’dah', 'W2 - Dhu al-Qi’dah', 'W3 - Dhu al-Qi’dah',
       'W4 - Dhu al-Qi’dah'],
      dtype='object')
Index(['W1 - Dhu al-Hijjah', 'W2 - Dhu al-Hijjah', 'W3 - Dhu al-Hijjah',
       'W4 - Dhu al-Hijjah'],
      dtype='object')
Index(['W1 - Ramadhan', 'W2 - Ramadhan', 'W3 - Ramadhan', 'W4 - Ramadhan'], dtype='object')
Index(['W1 - Shawwal', 'W2 - Shawwal', 'W3 - Shawwal', 'W4 - Shawwal'], dtype='object')
Index(['W1 - Dhu al-Qi’dah', 'W2 - Dhu al-Qi’dah', 'W3 - Dhu al-Qi’dah',
       'W4 - Dhu al-Qi’dah'],
      dtype='object')
Index(['W1 - Dhu al-Hijjah', 'W2 - Dhu al-Hijjah', 'W3 - Dhu al-Hijjah',
       'W4 - Dhu al-Hijjah'],
      dtype='object')


In [212]:
monthly_target_dates_dfs[0]

Unnamed: 0,Reservation Creation Date,M1 -,M2 -,M3 -,M4 -
0,1441-01-01,0.0,0.0,0.0,0.0
1,1441-01-02,0.0,0.0,0.0,0.0
2,1441-01-03,0.0,0.0,0.0,0.0
3,1441-01-04,0.0,0.0,0.0,0.0
4,1441-01-05,0.0,0.0,0.0,0.0
...,...,...,...,...,...
351,1441-12-26,,,,
352,1441-12-27,,,,
353,1441-12-28,,,,
354,1441-12-29,,,,


save monthly dfs to xlxs(non_accum) 

In [215]:
for index, df in enumerate(monthly_target_dates_dfs, start=0):
    file_name = f"monthly_target_date_df_{index}.xlsx"
    df.to_excel(file_name, index=False)
    print(f"DataFrame {index} saved as {file_name}")

DataFrame 0 saved as monthly_target_date_df_0.xlsx
DataFrame 1 saved as monthly_target_date_df_1.xlsx


save monthly dfs to xlxs 

In [213]:
for df in monthly_target_dates_accum_dfs:
    df.columns = ['Reservation Creation Date','M1 - Ramhadan' , 'M2 - Shawwal' , 'M3 - Dhu al-Qi’dah' , 'M4 - Dhu al-Hijjah']

In [214]:
for index, df in enumerate(monthly_target_dates_accum_dfs, start=0):
    file_name = f"monthly_target_dates_accum_df_{index}.xlsx"
    df.to_excel(file_name, index=False)
    print(f"DataFrame {index} saved as {file_name}")

DataFrame 0 saved as monthly_target_dates_accum_df_0.xlsx
DataFrame 1 saved as monthly_target_dates_accum_df_1.xlsx


save monthly dfs to xlxs(non_accum) 

In [216]:
for index, df in enumerate(weekly_target_dates_dfs, start=0):
    file_name = f"weekly_target_dates_accum_df_{index}.xlsx"
    df.to_excel(file_name, index=False)
    print(f"DataFrame {index} saved as {file_name}")

DataFrame 0 saved as weekly_target_dates_accum_df_0.xlsx
DataFrame 1 saved as weekly_target_dates_accum_df_1.xlsx
