In [1]:
import pandas as pd
from datetime import date
import numpy as np
from dateutil.relativedelta import relativedelta

In [2]:
# Load main subscription data
df_main = pd.read_excel('/Users/emirkabasoglu/Library/CloudStorage/OneDrive-CeridianHCMInc/Desktop/Board_Metrics_Input_File-1.xlsx')
# df_main = pd.read_excel('/Users/jesperfriislarnaes/Library/CloudStorage/OneDrive-eloomi/Scripts/New_Dashboard/test-data-a.xlsx')

In [3]:
# Set dates - report date is the month after the one being reported on
report_date = date(2024,8,1)
ltm_period_start = report_date + relativedelta(months=-25)

In [4]:
df_main['start'] = pd.to_datetime(df_main['start'], errors='coerce')
df_main['end'] = pd.to_datetime(df_main['end'], errors='coerce')

In [7]:
def carr_ltm_calculations(df, name):
    # Function to get CARR main grid
    def ltm_carr(df, ltm_period_start, ltm_period_end):
        while ltm_period_start < ltm_period_end:
            revenue_period = ltm_period_start + relativedelta(months=+1, days=-1)
            # Function to get carr calculation
            def carr_calculation(id):
                df1 = df.loc[df['id'] == id]
                # Set up per-id dataframe for active non-cancelled subscriptions
                mask_active_non_cancel = (df1['start'].dt.date <= revenue_period) & (df1['end'].dt.date >= revenue_period) & (df1['cancelled'].str.lower() != 'y')
                active_non_cancel = df1.loc[mask_active_non_cancel]['value'].sum()

                # Set up per-id dataframe for future committed non-renewal subscriptions
                mask_future = (df1['commit'].dt.date <= revenue_period) & (df1['start'].dt.date > revenue_period) #& (df1['renewal'].str.lower() != 'y')
                future = df1.loc[mask_future]['value'].sum()

                # Set up per-id dataframe for currently active subscriptions
                mask_active = (df1['start'].dt.date <= revenue_period) & (df1['end'].dt.date >= revenue_period)
                active = df1.loc[mask_active]['value'].sum()

                if (active_non_cancel + future - active) > 0:
                    barr =  active_non_cancel + future - active
                else:
                    barr = 0

                if (active + barr) > active:
                    carr = active + barr
                else:
                    carr = active
                return carr
            if df.empty:
                df[f'{revenue_period}'] = np.nan    
            else:
                df[f'{revenue_period}'] = df.apply(lambda x: carr_calculation(x['id']), axis=1)
            ltm_period_start = ltm_period_start + relativedelta(months=+1)
        return df.drop_duplicates(subset='id', keep='first')
    # df1 = ltm_carr(df, current_period)

    # Run function using dataframe as input and current_period to define the ltm period range
    df_ltm_carr = ltm_carr(df, ltm_period_start, report_date)

    # Group by ids to get grid on account level
    revenue_period_cols = list(df_ltm_carr.iloc[:,18:])
    df_ltm_carr_agg = df_ltm_carr.groupby('id')[revenue_period_cols].sum()
    df_ltm_carr_agg.to_excel(f'{name}-ltm-carr-per-account.xlsx')

    # Set up dataframe for carr ending
    df_carr_ending = df_ltm_carr_agg.agg(['sum']).rename(index={'sum': f'{name}_carr_ending'})

    # Set up dataframe for arr churn 
    # df_arr_churn = df_ltm_arr_agg.diff(axis=1)[df_ltm_arr_agg.eq(0)].agg(['sum']).rename(index={'sum': f'{name}_arr_churn'})

    # Set up dataframe for new logo arr
    df_carr_new_logo = df_ltm_carr_agg.diff(axis=1)[df_ltm_carr_agg.eq(0).shift(axis=1)].agg(['sum']).rename(index={'sum': f'{name}_carr_new_logo'})

    # Set up dataframe for upsell arr
    mask_carr_upsell = df_ltm_carr_agg[df_ltm_carr_agg.gt(0)].diff(axis=1).fillna(0).gt(0)
    df_carr_upsell = df_ltm_carr_agg.diff(axis=1)[mask_carr_upsell].agg(['sum']).rename(index={'sum': f'{name}_carr_upsell'})

    # Set up dataframe for downsell arr
    # mask_arr_downsell = df_ltm_arr_agg[df_ltm_arr_agg.gt(0)].diff(axis=1).fillna(0).lt(0)
    # df_arr_downsell = df_ltm_arr_agg.diff(axis=1)[mask_arr_downsell].agg(['sum']).rename(index={'sum': f'{name}_arr_downsell'})

    # Set up dataframe for arr ending logo count
    df_carr_ending_logo_count = df_ltm_carr_agg.replace(0,np.nan).count().to_frame(name=f'{name}_carr_ending_logo_count').transpose()

    # Set up dataframe for arr churn logo count
    # df_arr_churn_logo_count = df_ltm_arr_agg.diff(axis=1)[df_ltm_arr_agg.eq(0)].replace(0,np.nan).count().to_frame(name=f'{name}_arr_churn_logo_count').transpose()

    # Set up dataframe for arr new logo count
    df_carr_new_logo_count = df_ltm_carr_agg.diff(axis=1)[df_ltm_carr_agg.eq(0).shift(axis=1)].replace(0,np.nan).count().to_frame(name=f'{name}_carr_new_logo_count').transpose()

    # Set up dataframe for arr upsell logo count
    mask_carr_upsell = df_ltm_carr_agg[df_ltm_carr_agg.gt(0)].diff(axis=1).fillna(0).gt(0)
    df_carr_upsell_logo_count = df_ltm_carr_agg.diff(axis=1)[mask_carr_upsell].replace(0,np.nan).count().to_frame(name=f'{name}_carr_upsell_logo_count').transpose()

    # # Set up dataframe for downsell arr logo count
    # mask_arr_downsell = df_ltm_arr_agg[df_ltm_arr_agg.gt(0)].diff(axis=1).fillna(0).lt(0)
    # df_arr_downsell_logo_count = df_ltm_arr_agg.diff(axis=1)[mask_arr_downsell].replace(0,np.nan).count().to_frame(name=f'{name}_arr_downsell_logo_count').transpose()

    # Set up concatenated dataframe
    carr_dfs = pd.concat([df_carr_new_logo, df_carr_upsell, df_carr_ending, df_carr_new_logo_count, df_carr_upsell_logo_count, df_carr_ending_logo_count], axis=0)
    return carr_dfs

In [15]:
#region = ['norben','dach', 'uk', 'us', 'row']
#size = ['smb','mm','ent']
#product_type = ['people', 'infinite']
#for r in region:
   # with pd.ExcelWriter(f"{report_date}-{r}-arr.xlsx") as writer:
   #     for s in size:
    #        df = arr_ltm_calculations(df_main[(df_main['region'] == r) & (df_main['size'] == s)], f'{r}_{s}_arr')
     #       df.to_excel(writer, sheet_name=f"{r}_{s}_arr")
      #  for t in product_type:
       #     df = arr_ltm_calculations(df_main[(df_main['region'] == r) & (df_main['type'] == t)], f'{r}_{t}_arr')
        #    df.to_excel(writer, sheet_name=f"{r}_{t}_arr")

In [8]:
# ALL GEOS
# Size split total arr
# total arr
df_total_filter = df_main
df_total = carr_ltm_calculations(df_total_filter, 'all-geos-total')

# # smb total arr
df_smb_total_filter = df_main[df_main['size'] == 'smb']
df_smb_total = carr_ltm_calculations(df_smb_total_filter, 'all_geos_smb')

# # mm total arr
df_mm_total_filter = df_main[df_main['size'] == 'mm']
df_mm_total = carr_ltm_calculations(df_mm_total_filter, 'all_geos_mm')

# # ent total arr
df_ent_total_filter = df_main[df_main['size'] == 'ent']
df_ent_total = carr_ltm_calculations(df_ent_total_filter, 'all_geos_ent')

# Product split total arr
# # Software total
df_software_total_filter = df_main[df_main['product'] == 'software']
df_software_total = carr_ltm_calculations(df_software_total_filter, 'all_geos_software')

# # Content total
df_content_total_filter = df_main[df_main['product'] == 'content']
df_content_total = carr_ltm_calculations(df_content_total_filter, 'all_geos_content')

# # Type split total arr
# # People total
df_people_total_filter = df_main[df_main['type'] == 'people']
df_people_total = carr_ltm_calculations(df_people_total_filter, 'all_geos_people')

# # Infinite total
df_infinite_total_filter = df_main[df_main['type'] == 'infinite']
df_infinite_total = carr_ltm_calculations(df_infinite_total_filter, 'all_geos_infinite')

# Create output excel file for geo 
# # create a excel writer object
with pd.ExcelWriter(f"{report_date}-all-geos-arr.xlsx") as writer:
   
    # use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    df_total.to_excel(writer, sheet_name="all-geos-total-arr")
    df_smb_total.to_excel(writer, sheet_name="all-geos-smb-arr")
    df_mm_total.to_excel(writer, sheet_name="all-geos-mm-arr")
    df_ent_total.to_excel(writer, sheet_name="all-geos-ent-arr")
    df_software_total.to_excel(writer, sheet_name="all-geos-software-arr")
    df_content_total.to_excel(writer, sheet_name="all-geos-content-arr")
    df_people_total.to_excel(writer, sheet_name="all-geos-people-arr")
    df_infinite_total.to_excel(writer, sheet_name="all-geos-infinite-arr")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.apply(lambda x: carr_calculation(x['id']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.apply(lambda x: carr_calculation(x['id']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.apply(lambda x: 

In [None]:
## ---=== END ===--- ##

In [None]:
#FUNCTION TO GET THE ARR VALUE OF EVERY CUSTOMER FOR THE 12 MONTH PERIOD

# Set dates - report date is the month after the one being reported on
# report_date = date(2024,8,1)
# ltm_period_start = report_date + relativedelta(months=-12)


# def ltm_arr(df, ltm_period_start, ltm_period_end):
        # while ltm_period_start < ltm_period_end:
            # revenue_period = ltm_period_start + relativedelta(months=+1, days=-1)
            # mask_active = (df['start'].dt.date <= revenue_period) & (df['end'].dt.date >= revenue_period)
            # if df.empty:
               #  df[f'{revenue_period}'] = np.nan
            # else:
               #  df[f'{revenue_period}'] = df.loc[mask_active]['value']
            # ltm_period_start = ltm_period_start + relativedelta(months=+1)
        # # return df

# Run function using dataframe as input and current_end_of_month to define the ltm period range
# df_ltm_arr = ltm_arr(df_main, ltm_period_start, report_date)
    
# Get the current date
# current_date = pd.Timestamp.now()

# Format the date as 'yyyy-mm-dd'
# formatted_date = current_date.strftime('%Y-%m-%d')


# Group by ids to get grid on account level
# revenue_period_cols = list(df_ltm_arr.iloc[:,18:])
# df_ltm_arr_agg = df_ltm_arr.groupby('id')[revenue_period_cols].sum()
# df_ltm_arr_agg.to_excel(f'{formatted_date}-arr-per-account.xlsx')
    

In [None]:
# # NORBEN
# # Total and size split arr
# # total arr
df_norben_total_filter = df_main[df_main['region'] == 'norben']
df_norben_total = arr_ltm_calculations(df_norben_total_filter, 'norben_total')

# # smb total arr
df_norben_smb_filter = df_main[(df_main['region'] == 'norben') & (df_main['size'] == 'smb')]
df_norben_smb = arr_ltm_calculations(df_norben_smb_filter, 'norben_smb')

# # mm total arr
df_norben_mm_filter = df_main[(df_main['region'] == 'norben') & (df_main['size'] == 'mm')]
df_norben_mm = arr_ltm_calculations(df_norben_mm_filter, 'norben_mm')

# # ent total arr
df_norben_ent_filter = df_main[(df_main['region'] == 'norben') & (df_main['size'] == 'ent')]
df_norben_ent = arr_ltm_calculations(df_norben_ent_filter, 'norben_ent')

# Type split total arr
# People total
df_norben_people_filter = df_main[(df_main['region'] == 'norben') & (df_main['type'] == 'people')]
df_norben_people = arr_ltm_calculations(df_norben_people_filter, 'norben_people')

# Infinite total
df_norben_infinite_filter = df_main[(df_main['region'] == 'norben') & (df_main['type'] == 'infinite')]
df_norben_infinite = arr_ltm_calculations(df_norben_infinite_filter, 'norben_infinite')

# Create output excel file for geo

# create a excel writer object
with pd.ExcelWriter(f"{report_date}-norben-arr.xlsx") as writer:
   
    # use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    df_norben_total.to_excel(writer, sheet_name="norben-total-arr")
    df_norben_smb.to_excel(writer, sheet_name="norben-smb-arr")
    df_norben_mm.to_excel(writer, sheet_name="norben-mm-arr")
    df_norben_ent.to_excel(writer, sheet_name="norben-ent-arr")
    df_norben_people.to_excel(writer, sheet_name="norben-people-arr")
    df_norben_infinite.to_excel(writer, sheet_name="norben-infinite-arr")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.loc[mask_active]['value']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.loc[mask_active]['value']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.loc[mask_active]['value']
A value is trying to be set on a copy of

In [None]:
# # DACH
# # Total and size split arr
# # total arr
df_dach_total_filter = df_main[df_main['region'] == 'dach']
df_dach_total = arr_ltm_calculations(df_dach_total_filter, 'dach_total')

# # smb total arr
df_dach_smb_filter = df_main[(df_main['region'] == 'dach') & (df_main['size'] == 'smb')]
df_dach_smb = arr_ltm_calculations(df_dach_smb_filter, 'dach_smb')

# # mm total arr
df_dach_mm_filter = df_main[(df_main['region'] == 'dach') & (df_main['size'] == 'mm')]
df_dach_mm = arr_ltm_calculations(df_dach_mm_filter, 'dach_mm')

# # ent total arr
df_dach_ent_filter = df_main[(df_main['region'] == 'dach') & (df_main['size'] == 'ent')]
df_dach_ent = arr_ltm_calculations(df_dach_ent_filter, 'dach_ent')

# Type split total arr
# People total
df_dach_people_filter = df_main[(df_main['region'] == 'dach') & (df_main['type'] == 'people')]
df_dach_people = arr_ltm_calculations(df_dach_people_filter, 'dach_people')

# Infinite total
df_dach_infinite_filter = df_main[(df_main['region'] == 'dach') & (df_main['type'] == 'infinite')]
df_dach_infinite = arr_ltm_calculations(df_dach_infinite_filter, 'dach_infinite')

# Create output excel file for geo

# create a excel writer object
with pd.ExcelWriter(f"{report_date}-dach-arr.xlsx") as writer:
   
    # use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    df_dach_total.to_excel(writer, sheet_name="dach-total-arr")
    df_dach_smb.to_excel(writer, sheet_name="dach-smb-arr")
    df_dach_mm.to_excel(writer, sheet_name="dach-mm-arr")
    df_dach_ent.to_excel(writer, sheet_name="dach-ent-arr")
    df_dach_people.to_excel(writer, sheet_name="dach-people-arr")
    df_dach_infinite.to_excel(writer, sheet_name="dach-infinite-arr")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.loc[mask_active]['value']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.loc[mask_active]['value']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.loc[mask_active]['value']
A value is trying to be set on a copy of

In [None]:
# # UK
# # Total and size split arr
# # total arr
df_uk_total_filter = df_main[df_main['region'] == 'uk']
df_uk_total = arr_ltm_calculations(df_uk_total_filter, 'uk_total')

# # smb total arr
df_uk_smb_filter = df_main[(df_main['region'] == 'uk') & (df_main['size'] == 'smb')]
df_uk_smb = arr_ltm_calculations(df_uk_smb_filter, 'uk_smb')

# # mm total arr
df_uk_mm_filter = df_main[(df_main['region'] == 'uk') & (df_main['size'] == 'mm')]
df_uk_mm = arr_ltm_calculations(df_uk_mm_filter, 'uk_mm')

# # ent total arr
df_uk_ent_filter = df_main[(df_main['region'] == 'uk') & (df_main['size'] == 'ent')]
df_uk_ent = arr_ltm_calculations(df_uk_ent_filter, 'uk_ent')

# Type split total arr
# People total
df_uk_people_filter = df_main[(df_main['region'] == 'uk') & (df_main['type'] == 'people')]
df_uk_people = arr_ltm_calculations(df_uk_people_filter, 'uk_people')

# Infinite total
df_uk_infinite_filter = df_main[(df_main['region'] == 'uk') & (df_main['type'] == 'infinite')]
df_uk_infinite = arr_ltm_calculations(df_uk_infinite_filter, 'uk_infinite')

# Create output excel file for geo

# create a excel writer object
with pd.ExcelWriter(f"{report_date}-uk-arr.xlsx") as writer:
   
    # use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    df_uk_total.to_excel(writer, sheet_name="uk-total-arr")
    df_uk_smb.to_excel(writer, sheet_name="uk-smb-arr")
    df_uk_mm.to_excel(writer, sheet_name="uk-mm-arr")
    df_uk_ent.to_excel(writer, sheet_name="uk-ent-arr")
    df_uk_people.to_excel(writer, sheet_name="uk-people-arr")
    df_uk_infinite.to_excel(writer, sheet_name="uk-infinite-arr")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.loc[mask_active]['value']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.loc[mask_active]['value']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.loc[mask_active]['value']
A value is trying to be set on a copy of

In [None]:
# # US
# # Total and size split arr
# # total arr
df_us_total_filter = df_main[df_main['region'] == 'us']
df_us_total = arr_ltm_calculations(df_us_total_filter, 'us_total')

# # smb total arr
df_us_smb_filter = df_main[(df_main['region'] == 'us') & (df_main['size'] == 'smb')]
df_us_smb = arr_ltm_calculations(df_us_smb_filter, 'us_smb')

# # mm total arr
df_us_mm_filter = df_main[(df_main['region'] == 'us') & (df_main['size'] == 'mm')]
df_us_mm = arr_ltm_calculations(df_us_mm_filter, 'us_mm')

# # ent total arr
df_us_ent_filter = df_main[(df_main['region'] == 'us') & (df_main['size'] == 'ent')]
df_us_ent = arr_ltm_calculations(df_us_ent_filter, 'us_ent')

# Type split total arr
# People total
df_us_people_filter = df_main[(df_main['region'] == 'us') & (df_main['type'] == 'people')]
df_us_people = arr_ltm_calculations(df_us_people_filter, 'us_people')

# Infinite total
df_us_infinite_filter = df_main[(df_main['region'] == 'us') & (df_main['type'] == 'infinite')]
df_us_infinite = arr_ltm_calculations(df_us_infinite_filter, 'us_infinite')

# Create output excel file for geo

# create a excel writer object
with pd.ExcelWriter(f"{report_date}-us-arr.xlsx") as writer:
   
    # use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    df_us_total.to_excel(writer, sheet_name="us-total-arr")
    df_us_smb.to_excel(writer, sheet_name="us-smb-arr")
    df_us_mm.to_excel(writer, sheet_name="us-mm-arr")
    df_us_ent.to_excel(writer, sheet_name="us-ent-arr")
    df_us_people.to_excel(writer, sheet_name="us-people-arr")
    df_us_infinite.to_excel(writer, sheet_name="us-infinite-arr")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.loc[mask_active]['value']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.loc[mask_active]['value']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.loc[mask_active]['value']
A value is trying to be set on a copy of

In [None]:
# # ROW
# # Total and size split arr
# # total arr
df_row_total_filter = df_main[df_main['region'] == 'row']
df_row_total = arr_ltm_calculations(df_row_total_filter, 'row_total')

# # smb total arr
df_row_smb_filter = df_main[(df_main['region'] == 'row') & (df_main['size'] == 'smb')]
df_row_smb = arr_ltm_calculations(df_row_smb_filter, 'row_smb')

# # mm total arr
df_row_mm_filter = df_main[(df_main['region'] == 'row') & (df_main['size'] == 'mm')]
df_row_mm = arr_ltm_calculations(df_row_mm_filter, 'row_mm')

# # ent total arr
df_row_ent_filter = df_main[(df_main['region'] == 'row') & (df_main['size'] == 'ent')]
df_row_ent = arr_ltm_calculations(df_row_ent_filter, 'row_ent')

# Type split total arr
# People total
df_row_people_filter = df_main[(df_main['region'] == 'row') & (df_main['type'] == 'people')]
df_row_people = arr_ltm_calculations(df_row_people_filter, 'row_people')

# Infinite total
df_row_infinite_filter = df_main[(df_main['region'] == 'row') & (df_main['type'] == 'infinite')]
df_row_infinite = arr_ltm_calculations(df_row_infinite_filter, 'row_infinite')

# Create output excel file for geo

# create a excel writer object
with pd.ExcelWriter(f"{report_date}-row-arr.xlsx") as writer:
   
    # use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    df_row_total.to_excel(writer, sheet_name="row-total-arr")
    df_row_smb.to_excel(writer, sheet_name="row-smb-arr")
    df_row_mm.to_excel(writer, sheet_name="row-mm-arr")
    df_row_ent.to_excel(writer, sheet_name="row-ent-arr")
    df_row_people.to_excel(writer, sheet_name="row-people-arr")
    df_row_infinite.to_excel(writer, sheet_name="row-infinite-arr")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.loc[mask_active]['value']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.loc[mask_active]['value']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.loc[mask_active]['value']
A value is trying to be set on a copy of

In [None]:
## OLD
# Set date periods
# current_date = date(2022,5,1)
# current_period = pd.Period(current_date, 'M')
# print(current_period)
# current_period = current_date + relativedelta(months=+1, days=-1)

In [None]:
# ---> TEST # Currently active subscriptions
mask_active = (df1['start'].dt.to_period('M') <= current_period) & (df1['end'].dt.to_period('M') >= current_period)
df1.loc[mask_active].groupby('id').agg({'value': 'sum'})

NameError: name 'df1' is not defined

In [None]:
# ---> TEST # Future committed non-renewal subscriptions
mask_future = (df1['commit'].dt.to_period('M') <= current_period) & (df1['start'].dt.to_period('M') > current_period) & (df1['renewal'].str.lower() != 'y')
df1.loc[mask_future].groupby('id').agg({'value': 'sum'})

In [None]:
# ---> TEST # Active non-cancelled subscriptions
mask_active_non_cancel = (df1['start'].dt.to_period('M') <= current_period) & (df1['end'].dt.to_period('M') >= current_period) & (df1['cancelled'].str.lower() != 'y')
df1.loc[mask_active_non_cancel].groupby('id').agg({'value': 'sum'})

In [None]:
# OLD Set up mask to find 0 values
mask_arr_eq_0 = df_ltm_arr_agg.eq(0)
mask_arr_eq_0 

In [None]:
# OLD Set up dataframe for arr churn 
df_arr_churn = df_ltm_arr_agg.diff(axis=1)[mask_arr_eq_0].agg(['sum']).rename(index={'sum': 'arr_churn'})
df_arr_churn    

In [None]:
# OLD Set up dataframe for new logo arr
df_arr_new_logo = df_ltm_arr_agg.diff(axis=1)[mask_arr_eq_0.shift(axis=1)].agg(['sum']).rename(index={'sum': 'arr_new_logo'})
df_arr_new_logo

In [None]:
df1.reindex(sorted(df1.columns), axis=1)