In [1]:
import pandas as pd
from datetime import date
import numpy as np
from dateutil.relativedelta import relativedelta

In [2]:
# Load main subscription data
df_main = pd.read_excel('/Users/emirkabasoglu/Desktop/Eloomi/VS Codes/Jupyter Notebook Scripts/Snowflake Scripts/Board_Metrics_Input_File.xlsx')
# df_main = pd.read_excel('/Users/jesperfriislarnaes/Library/CloudStorage/OneDrive-eloomi/Scripts/New_Dashboard/test-data-a.xlsx')

In [3]:
# Set dates - report date is the month after the one being reported on
report_date = date(2023,10,1)
ltm_period_start = report_date + relativedelta(months=-13)

In [4]:
df_main['start'] = pd.to_datetime(df_main['start'])
df_main['end'] = pd.to_datetime(df_main['end'])
df_main['commit'] = pd.to_datetime(df_main['commit'])


In [5]:
def carr_ltm_calculations(df, name):
    # Function to get CARR main grid
    def ltm_carr(df, ltm_period_start, ltm_period_end):
        while ltm_period_start < ltm_period_end:
            revenue_period = ltm_period_start + relativedelta(months=+1, days=-1)
            # Function to get carr calculation
            def carr_calculation(id):
                df1 = df.loc[df['id'] == id]
                # Set up per-id dataframe for active non-cancelled subscriptions
                mask_active_non_cancel = (df1['start'].dt.date <= revenue_period) & (df1['end'].dt.date >= revenue_period) & (df1['cancelled'].str.lower() != 'y')
                active_non_cancel = df1.loc[mask_active_non_cancel]['value'].sum()

                # Set up per-id dataframe for future committed non-renewal subscriptions
                mask_future = (df1['commit'].dt.date <= revenue_period) & (df1['start'].dt.date > revenue_period) #& (df1['renewal'].str.lower() != 'y')
                future = df1.loc[mask_future]['value'].sum()

                # Set up per-id dataframe for currently active subscriptions
                mask_active = (df1['start'].dt.date <= revenue_period) & (df1['end'].dt.date >= revenue_period)
                active = df1.loc[mask_active]['value'].sum()

                if (active_non_cancel + future - active) > 0:
                    barr =  active_non_cancel + future - active
                else:
                    barr = 0

                if (active + barr) > active:
                    carr = active + barr
                else:
                    carr = active
                return carr
            if df.empty:
                df[f'{revenue_period}'] = np.nan    
            else:
                df[f'{revenue_period}'] = df.apply(lambda x: carr_calculation(x['id']), axis=1)
            ltm_period_start = ltm_period_start + relativedelta(months=+1)
        return df.drop_duplicates(subset='id', keep='first')
    # df1 = ltm_carr(df, current_period)

    # Run function using dataframe as input and current_period to define the ltm period range
    df_ltm_carr = ltm_carr(df, ltm_period_start, report_date)

    # Group by ids to get grid on account level
    revenue_period_cols = list(df_ltm_carr.iloc[:,12:])
    df_ltm_carr_agg = df_ltm_carr.groupby('id')[revenue_period_cols].sum()
    df_ltm_carr_agg.to_excel(f'{name}-ltm-carr-per-account.xlsx')

    # Set up dataframe for carr ending
    df_carr_ending = df_ltm_carr_agg.agg(['sum']).rename(index={'sum': f'{name}_carr_ending'})

    # Set up dataframe for arr churn 
    # df_arr_churn = df_ltm_arr_agg.diff(axis=1)[df_ltm_arr_agg.eq(0)].agg(['sum']).rename(index={'sum': f'{name}_arr_churn'})

    # Set up dataframe for new logo arr
    df_carr_new_logo = df_ltm_carr_agg.diff(axis=1)[df_ltm_carr_agg.eq(0).shift(axis=1)].agg(['sum']).rename(index={'sum': f'{name}_carr_new_logo'})

    # Set up dataframe for upsell arr
    mask_carr_upsell = df_ltm_carr_agg[df_ltm_carr_agg.gt(0)].diff(axis=1).fillna(0).gt(0)
    df_carr_upsell = df_ltm_carr_agg.diff(axis=1)[mask_carr_upsell].agg(['sum']).rename(index={'sum': f'{name}_carr_upsell'})

    # Set up dataframe for downsell arr
    # mask_arr_downsell = df_ltm_arr_agg[df_ltm_arr_agg.gt(0)].diff(axis=1).fillna(0).lt(0)
    # df_arr_downsell = df_ltm_arr_agg.diff(axis=1)[mask_arr_downsell].agg(['sum']).rename(index={'sum': f'{name}_arr_downsell'})

    # Set up dataframe for arr ending logo count
    df_carr_ending_logo_count = df_ltm_carr_agg.replace(0,np.nan).count().to_frame(name=f'{name}_carr_ending_logo_count').transpose()

    # Set up dataframe for arr churn logo count
    # df_arr_churn_logo_count = df_ltm_arr_agg.diff(axis=1)[df_ltm_arr_agg.eq(0)].replace(0,np.nan).count().to_frame(name=f'{name}_arr_churn_logo_count').transpose()

    # Set up dataframe for arr new logo count
    df_carr_new_logo_count = df_ltm_carr_agg.diff(axis=1)[df_ltm_carr_agg.eq(0).shift(axis=1)].replace(0,np.nan).count().to_frame(name=f'{name}_carr_new_logo_count').transpose()

    # Set up dataframe for arr upsell logo count
    mask_carr_upsell = df_ltm_carr_agg[df_ltm_carr_agg.gt(0)].diff(axis=1).fillna(0).gt(0)
    df_carr_upsell_logo_count = df_ltm_carr_agg.diff(axis=1)[mask_carr_upsell].replace(0,np.nan).count().to_frame(name=f'{name}_carr_upsell_logo_count').transpose()

    # # Set up dataframe for downsell arr logo count
    # mask_arr_downsell = df_ltm_arr_agg[df_ltm_arr_agg.gt(0)].diff(axis=1).fillna(0).lt(0)
    # df_arr_downsell_logo_count = df_ltm_arr_agg.diff(axis=1)[mask_arr_downsell].replace(0,np.nan).count().to_frame(name=f'{name}_arr_downsell_logo_count').transpose()

    # Set up concatenated dataframe
    carr_dfs = pd.concat([df_carr_new_logo, df_carr_upsell, df_carr_ending, df_carr_new_logo_count, df_carr_upsell_logo_count, df_carr_ending_logo_count], axis=0)
    return carr_dfs

In [6]:
df_main.to_excel('carr-main-check-1.xlsx')

In [7]:
# # NORBEN and SOURCE
# # Total and size split carr
# # total carr
# df_norben_total_filter = df_main[df_main['region'] == 'norben']
# df_norben_total = carr_ltm_calculations(df_norben_total_filter, 'norben_total')

# # Google total carr
df_norben_google_filter = df_main[(df_main['region'] == 'norben') & (df_main['inbound_source'] == 'google')]
df_norben_google = carr_ltm_calculations(df_norben_google_filter, 'norben_google')

# # Bing total carr
df_norben_bing_filter = df_main[(df_main['region'] == 'norben') & (df_main['inbound_source'] == 'bing')]
df_norben_bing = carr_ltm_calculations(df_norben_bing_filter, 'norben_bing')

# # Capterra total carr
# df_norben_capterra_filter = df_main[(df_main['region'] == 'norben') & (df_main['inbound_source'] == 'capterra')]
# df_norben_capterra = carr_ltm_calculations(df_norben_capterra_filter, 'norben_capterra')

# # Facebook total carr
df_norben_facebook_filter = df_main[(df_main['region'] == 'norben') & (df_main['inbound_source'] == 'facebook')]
df_norben_facebook = carr_ltm_calculations(df_norben_facebook_filter, 'norben_facebook')

# # Linkedin total carr
df_norben_linkedin_filter = df_main[(df_main['region'] == 'norben') & (df_main['inbound_source'] == 'linkedin')]
df_norben_linkedin = carr_ltm_calculations(df_norben_linkedin_filter, 'norben_linkedin')

# # Review total carr
df_norben_review_filter = df_main[(df_main['region'] == 'norben') & (df_main['inbound_source'] == 'review')]
df_norben_review = carr_ltm_calculations(df_norben_review_filter, 'norben_review')

# # other total carr
df_norben_other_filter = df_main[(df_main['region'] == 'norben') & (df_main['inbound_source'] == 'other')]
df_norben_other = carr_ltm_calculations(df_norben_other_filter, 'norben_other')
# Create output excel file for geo

# create a excel writer object
with pd.ExcelWriter(f"{report_date}-norben-sources-carr.xlsx") as writer:
   
    # use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    df_norben_google.to_excel(writer, sheet_name="norben-google-carr")
    df_norben_bing.to_excel(writer, sheet_name="norben-bing-carr")
    # df_norben_capterra.to_excel(writer, sheet_name="norben-capterra-carr")
    df_norben_facebook.to_excel(writer, sheet_name="norben-facebook-carr")
    df_norben_linkedin.to_excel(writer, sheet_name="norben-linkedin-carr")
    df_norben_review.to_excel(writer, sheet_name="norben-review-carr")
    df_norben_other.to_excel(writer, sheet_name="norben-other-carr")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.apply(lambda x: carr_calculation(x['id']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.apply(lambda x: carr_calculation(x['id']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.apply(lambda x: 

In [8]:
# # UK and SOURCE
# # Total and size split carr
# # total carr
# df_norben_total_filter = df_main[df_main['region'] == 'norben']
# df_norben_total = carr_ltm_calculations(df_norben_total_filter, 'norben_total')

# # Google total carr
df_uk_google_filter = df_main[(df_main['region'] == 'uk') & (df_main['inbound_source'] == 'google')]
df_uk_google = carr_ltm_calculations(df_uk_google_filter, 'uk_google')

# # Bing total carr
df_uk_bing_filter = df_main[(df_main['region'] == 'uk') & (df_main['inbound_source'] == 'bing')]
df_uk_bing = carr_ltm_calculations(df_uk_bing_filter, 'uk_bing')

# # # Capterra total carr
# df_uk_capterra_filter = df_main[(df_main['region'] == 'uk') & (df_main['inbound_source'] == 'capterra')]
# df_uk_capterra = carr_ltm_calculations(df_uk_capterra_filter, 'uk_capterra')

# # Facebook total carr
df_uk_facebook_filter = df_main[(df_main['region'] == 'uk') & (df_main['inbound_source'] == 'facebook')]
df_uk_facebook = carr_ltm_calculations(df_uk_facebook_filter, 'uk_facebook')

# # Linkedin total carr
df_uk_linkedin_filter = df_main[(df_main['region'] == 'uk') & (df_main['inbound_source'] == 'linkedin')]
df_uk_linkedin = carr_ltm_calculations(df_uk_linkedin_filter, 'uk_linkedin')

# # Review total carr
df_uk_review_filter = df_main[(df_main['region'] == 'uk') & (df_main['inbound_source'] == 'review')]
df_uk_review = carr_ltm_calculations(df_uk_review_filter, 'uk_review')

# # other total carr
df_uk_other_filter = df_main[(df_main['region'] == 'uk') & (df_main['inbound_source'] == 'other')]
df_uk_other = carr_ltm_calculations(df_uk_other_filter, 'uk_other')
# Create output excel file for geo

# create a excel writer object
with pd.ExcelWriter(f"{report_date}-uk-sources-carr.xlsx") as writer:
   
    # use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    df_uk_google.to_excel(writer, sheet_name="uk-google-carr")
    df_uk_bing.to_excel(writer, sheet_name="uk-bing-carr")
    # df_uk_capterra.to_excel(writer, sheet_name="uk-capterra-carr")
    df_uk_facebook.to_excel(writer, sheet_name="uk-facebook-carr")
    df_uk_linkedin.to_excel(writer, sheet_name="uk-linkedin-carr")
    df_uk_review.to_excel(writer, sheet_name="uk-review-carr")
    df_uk_other.to_excel(writer, sheet_name="uk-other-carr")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.apply(lambda x: carr_calculation(x['id']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.apply(lambda x: carr_calculation(x['id']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.apply(lambda x: 

KeyboardInterrupt: 

In [None]:
# # US and SOURCE
# # Total and size split carr
# # total carr
# df_norben_total_filter = df_main[df_main['region'] == 'norben']
# df_norben_total = carr_ltm_calculations(df_norben_total_filter, 'norben_total')

# # Google total carr
df_us_google_filter = df_main[(df_main['region'] == 'us') & (df_main['inbound_source'] == 'google')]
df_us_google = carr_ltm_calculations(df_us_google_filter, 'us_google')

# # Bing total carr
df_us_bing_filter = df_main[(df_main['region'] == 'us') & (df_main['inbound_source'] == 'bing')]
df_us_bing = carr_ltm_calculations(df_us_bing_filter, 'us_bing')

# # # Capterra total carr
# df_us_capterra_filter = df_main[(df_main['region'] == 'us') & (df_main['inbound_source'] == 'capterra')]
# df_us_capterra = carr_ltm_calculations(df_us_capterra_filter, 'us_capterra')

# # Facebook total carr
df_us_facebook_filter = df_main[(df_main['region'] == 'us') & (df_main['inbound_source'] == 'facebook')]
df_us_facebook = carr_ltm_calculations(df_us_facebook_filter, 'us_facebook')

# # Linkedin total carr
df_us_linkedin_filter = df_main[(df_main['region'] == 'us') & (df_main['inbound_source'] == 'linkedin')]
df_us_linkedin = carr_ltm_calculations(df_us_linkedin_filter, 'us_linkedin')

# # Review total carr
df_us_review_filter = df_main[(df_main['region'] == 'us') & (df_main['inbound_source'] == 'review')]
df_us_review = carr_ltm_calculations(df_us_review_filter, 'us_review')

# # other total carr
df_us_other_filter = df_main[(df_main['region'] == 'us') & (df_main['inbound_source'] == 'other')]
df_us_other = carr_ltm_calculations(df_us_other_filter, 'us_other')
# Create output excel file for geo

# create a excel writer object
with pd.ExcelWriter(f"{report_date}-us-sources-carr.xlsx") as writer:
   
    # use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    df_us_google.to_excel(writer, sheet_name="us-google-carr")
    df_us_bing.to_excel(writer, sheet_name="us-bing-carr")
    # df_us_capterra.to_excel(writer, sheet_name="us-capterra-carr")
    df_us_facebook.to_excel(writer, sheet_name="us-facebook-carr")
    df_us_linkedin.to_excel(writer, sheet_name="us-linkedin-carr")
    df_us_review.to_excel(writer, sheet_name="us-review-carr")
    df_us_other.to_excel(writer, sheet_name="us-other-carr")

In [None]:
# # DACH and SOURCE
# # Total and size split carr
# # total carr
# df_norben_total_filter = df_main[df_main['region'] == 'norben']
# df_norben_total = carr_ltm_calculations(df_norben_total_filter, 'norben_total')

# # Google total carr
df_dach_google_filter = df_main[(df_main['region'] == 'dach') & (df_main['inbound_source'] == 'google')]
df_dach_google = carr_ltm_calculations(df_dach_google_filter, 'dach_google')

# # Bing total carr
df_dach_bing_filter = df_main[(df_main['region'] == 'dach') & (df_main['inbound_source'] == 'bing')]
df_dach_bing = carr_ltm_calculations(df_dach_bing_filter, 'dach_bing')

# # # Capterra total carr
# df_dach_capterra_filter = df_main[(df_main['region'] == 'dach') & (df_main['inbound_source'] == 'capterra')]
# df_dach_capterra = carr_ltm_calculations(df_dach_capterra_filter, 'dach_capterra')

# # Facebook total carr
df_dach_facebook_filter = df_main[(df_main['region'] == 'dach') & (df_main['inbound_source'] == 'facebook')]
df_dach_facebook = carr_ltm_calculations(df_dach_facebook_filter, 'dach_facebook')

# # Linkedin total carr
df_dach_linkedin_filter = df_main[(df_main['region'] == 'dach') & (df_main['inbound_source'] == 'linkedin')]
df_dach_linkedin = carr_ltm_calculations(df_dach_linkedin_filter, 'dach_linkedin')

# # Review total carr
df_dach_review_filter = df_main[(df_main['region'] == 'dach') & (df_main['inbound_source'] == 'review')]
df_dach_review = carr_ltm_calculations(df_dach_review_filter, 'dach_review')

# # other total carr
df_dach_other_filter = df_main[(df_main['region'] == 'dach') & (df_main['inbound_source'] == 'other')]
df_dach_other = carr_ltm_calculations(df_dach_other_filter, 'dach_other')
# Create output excel file for geo

# create a excel writer object
with pd.ExcelWriter(f"{report_date}-dach-sources-carr.xlsx") as writer:
   
    # use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    df_dach_google.to_excel(writer, sheet_name="dach-google-carr")
    df_dach_bing.to_excel(writer, sheet_name="dach-bing-carr")
    # df_dach_capterra.to_excel(writer, sheet_name="dach-capterra-carr")
    df_dach_facebook.to_excel(writer, sheet_name="dach-facebook-carr")
    df_dach_linkedin.to_excel(writer, sheet_name="dach-linkedin-carr")
    df_dach_review.to_excel(writer, sheet_name="dach-review-carr")
    df_dach_other.to_excel(writer, sheet_name="dach-other-carr")

In [None]:
# # ROW and SOURCE
# # Total and size split carr
# # total carr
# df_norben_total_filter = df_main[df_main['region'] == 'norben']
# df_norben_total = carr_ltm_calculations(df_norben_total_filter, 'norben_total')

# # Google total carr
df_row_google_filter = df_main[(df_main['region'] == 'row') & (df_main['inbound_source'] == 'google')]
df_row_google = carr_ltm_calculations(df_row_google_filter, 'row_google')

# # Bing total carr
df_row_bing_filter = df_main[(df_main['region'] == 'row') & (df_main['inbound_source'] == 'bing')]
df_row_bing = carr_ltm_calculations(df_row_bing_filter, 'row_bing')

# # # Capterra total carr
# df_row_capterra_filter = df_main[(df_main['region'] == 'row') & (df_main['inbound_source'] == 'capterra')]
# df_row_capterra = carr_ltm_calculations(df_row_capterra_filter, 'row_capterra')

# # Facebook total carr
df_row_facebook_filter = df_main[(df_main['region'] == 'row') & (df_main['inbound_source'] == 'facebook')]
df_row_facebook = carr_ltm_calculations(df_row_facebook_filter, 'row_facebook')

# # Linkedin total carr
df_row_linkedin_filter = df_main[(df_main['region'] == 'row') & (df_main['inbound_source'] == 'linkedin')]
df_row_linkedin = carr_ltm_calculations(df_row_linkedin_filter, 'row_linkedin')

# # Review total carr
df_row_review_filter = df_main[(df_main['region'] == 'row') & (df_main['inbound_source'] == 'review')]
df_row_review = carr_ltm_calculations(df_row_review_filter, 'row_review')

# # other total carr
df_row_other_filter = df_main[(df_main['region'] == 'row') & (df_main['inbound_source'] == 'other')]
df_row_other = carr_ltm_calculations(df_row_other_filter, 'row_other')
# Create output excel file for geo

# create a excel writer object
with pd.ExcelWriter(f"{report_date}-row-sources-carr.xlsx") as writer:
   
    # use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    df_row_google.to_excel(writer, sheet_name="row-google-carr")
    df_row_bing.to_excel(writer, sheet_name="row-bing-carr")
    # df_row_capterra.to_excel(writer, sheet_name="row-capterra-carr")
    df_row_facebook.to_excel(writer, sheet_name="row-facebook-carr")
    df_row_linkedin.to_excel(writer, sheet_name="row-linkedin-carr")
    df_row_review.to_excel(writer, sheet_name="row-review-carr")
    df_row_other.to_excel(writer, sheet_name="row-other-carr")

In [None]:
# ALL GEOS
# Size split total carr
# total carr
df_total_filter = df_main
df_total = carr_ltm_calculations(df_total_filter, 'all_geos_total')

# smb total carr
df_smb_total_filter = df_main[df_main['size'] == 'smb']
df_smb_total = carr_ltm_calculations(df_smb_total_filter, 'all_geos_smb')

# mm total carr
df_mm_total_filter = df_main[df_main['size'] == 'mm']
df_mm_total = carr_ltm_calculations(df_mm_total_filter, 'all_geos_mm')

# ent total carr
df_ent_total_filter = df_main[df_main['size'] == 'ent']
df_ent_total = carr_ltm_calculations(df_ent_total_filter, 'all_geos_ent')

# # Product split total carr
# # Software total
df_software_total_filter = df_main[df_main['product'] == 'software']
df_software_total = carr_ltm_calculations(df_software_total_filter, 'all_geos_software')

# # Content total
df_content_total_filter = df_main[df_main['product'] == 'content']
df_content_total = carr_ltm_calculations(df_content_total_filter, 'all_geos_content')

# # Type split total carr
# # People total
df_people_total_filter = df_main[df_main['type'] == 'people']
df_people_total = carr_ltm_calculations(df_people_total_filter, 'all_geos_people')

# # Infinite total
df_infinite_total_filter = df_main[df_main['type'] == 'infinite']
df_infinite_total = carr_ltm_calculations(df_infinite_total_filter, 'all_geos_infinite')

# Create output excel file for geo 
# # create a excel writer object
with pd.ExcelWriter(f"{report_date}-all-geos-carr.xlsx") as writer:
   
    # use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    df_total.to_excel(writer, sheet_name="all-geos-total-carr")
    df_smb_total.to_excel(writer, sheet_name="all-geos-smb-carr")
    df_mm_total.to_excel(writer, sheet_name="all-geos-mm-carr")
    df_ent_total.to_excel(writer, sheet_name="all-geos-ent-carr")
    df_software_total.to_excel(writer, sheet_name="all-geos-software-carr")
    df_content_total.to_excel(writer, sheet_name="all-geos-content-carr")
    df_people_total.to_excel(writer, sheet_name="all-geos-people-carr")
    df_infinite_total.to_excel(writer, sheet_name="all-geos-infinite-carr")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.apply(lambda x: carr_calculation(x['id']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.apply(lambda x: carr_calculation(x['id']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f'{revenue_period}'] = df.apply(lambda x: 

In [None]:
# # NORBEN
# # Total and size split carr
# # total carr
df_norben_total_filter = df_main[df_main['region'] == 'norben']
df_norben_total = carr_ltm_calculations(df_norben_total_filter, 'norben_total')

# # smb total carr
df_norben_smb_filter = df_main[(df_main['region'] == 'norben') & (df_main['size'] == 'smb')]
df_norben_smb = carr_ltm_calculations(df_norben_smb_filter, 'norben_smb')

# # mm total carr
df_norben_mm_filter = df_main[(df_main['region'] == 'norben') & (df_main['size'] == 'mm')]
df_norben_mm = carr_ltm_calculations(df_norben_mm_filter, 'norben_mm')

# # ent total carr
df_norben_ent_filter = df_main[(df_main['region'] == 'norben') & (df_main['size'] == 'ent')]
df_norben_ent = carr_ltm_calculations(df_norben_ent_filter, 'norben_ent')

# Create output excel file for geo

# create a excel writer object
with pd.ExcelWriter(f"{report_date}-norben-carr.xlsx") as writer:
   
    # use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    df_norben_total.to_excel(writer, sheet_name="norben-total-carr")
    df_norben_smb.to_excel(writer, sheet_name="norben-smb-carr")
    df_norben_mm.to_excel(writer, sheet_name="norben-mm-carr")
    df_norben_ent.to_excel(writer, sheet_name="norben-ent-carr")

In [None]:
# # DACH
# # Total and size split carr
# # total carr
df_dach_total_filter = df_main[df_main['region'] == 'dach']
df_dach_total = carr_ltm_calculations(df_dach_total_filter, 'dach_total')

# # smb total carr
df_dach_smb_filter = df_main[(df_main['region'] == 'dach') & (df_main['size'] == 'smb')]
df_dach_smb = carr_ltm_calculations(df_dach_smb_filter, 'dach_smb')

# # mm total carr
df_dach_mm_filter = df_main[(df_main['region'] == 'dach') & (df_main['size'] == 'mm')]
df_dach_mm = carr_ltm_calculations(df_dach_mm_filter, 'dach_mm')

# # ent total carr
df_dach_ent_filter = df_main[(df_main['region'] == 'dach') & (df_main['size'] == 'ent')]
df_dach_ent = carr_ltm_calculations(df_dach_ent_filter, 'dach_ent')

# Create output excel file for geo

# create a excel writer object
with pd.ExcelWriter(f"{report_date}-dach-carr.xlsx") as writer:
   
    # use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    df_dach_total.to_excel(writer, sheet_name="dach-total-carr")
    df_dach_smb.to_excel(writer, sheet_name="dach-smb-carr")
    df_dach_mm.to_excel(writer, sheet_name="dach-mm-carr")
    df_dach_ent.to_excel(writer, sheet_name="dach-ent-carr")

In [None]:
# # UK
# # Total and size split carr
# # total carr
df_uk_total_filter = df_main[df_main['region'] == 'uk']
df_uk_total = carr_ltm_calculations(df_uk_total_filter, 'uk_total')

# # smb total carr
df_uk_smb_filter = df_main[(df_main['region'] == 'uk') & (df_main['size'] == 'smb')]
df_uk_smb = carr_ltm_calculations(df_uk_smb_filter, 'uk_smb')

# # mm total carr
df_uk_mm_filter = df_main[(df_main['region'] == 'uk') & (df_main['size'] == 'mm')]
df_uk_mm = carr_ltm_calculations(df_uk_mm_filter, 'uk_mm')

# # ent total carr
df_uk_ent_filter = df_main[(df_main['region'] == 'uk') & (df_main['size'] == 'ent')]
df_uk_ent = carr_ltm_calculations(df_uk_ent_filter, 'uk_ent')

# Create output excel file for geo

# create a excel writer object
with pd.ExcelWriter(f"{report_date}-uk-carr.xlsx") as writer:
   
    # use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    df_uk_total.to_excel(writer, sheet_name="uk-total-carr")
    df_uk_smb.to_excel(writer, sheet_name="uk-smb-carr")
    df_uk_mm.to_excel(writer, sheet_name="uk-mm-carr")
    df_uk_ent.to_excel(writer, sheet_name="uk-ent-carr")

In [None]:
# # US
# # Total and size split carr
# # total carr
df_us_total_filter = df_main[df_main['region'] == 'us']
df_us_total = carr_ltm_calculations(df_us_total_filter, 'us_total')

# # smb total carr
df_us_smb_filter = df_main[(df_main['region'] == 'us') & (df_main['size'] == 'smb')]
df_us_smb = carr_ltm_calculations(df_us_smb_filter, 'us_smb')

# # mm total carr
df_us_mm_filter = df_main[(df_main['region'] == 'us') & (df_main['size'] == 'mm')]
df_us_mm = carr_ltm_calculations(df_us_mm_filter, 'us_mm')

# # ent total carr
df_us_ent_filter = df_main[(df_main['region'] == 'us') & (df_main['size'] == 'ent')]
df_us_ent = carr_ltm_calculations(df_us_ent_filter, 'us_ent')

# Create output excel file for geo

# create a excel writer object
with pd.ExcelWriter(f"{report_date}-us-carr.xlsx") as writer:
   
    # use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    df_us_total.to_excel(writer, sheet_name="us-total-carr")
    df_us_smb.to_excel(writer, sheet_name="us-smb-carr")
    df_us_mm.to_excel(writer, sheet_name="us-mm-carr")
    df_us_ent.to_excel(writer, sheet_name="us-ent-carr")

In [None]:
# # ROW
# # Total and size split carr
# # total carr
df_row_total_filter = df_main[df_main['region'] == 'row']
df_row_total = carr_ltm_calculations(df_row_total_filter, 'row_total')

# # smb total carr
df_row_smb_filter = df_main[(df_main['region'] == 'row') & (df_main['size'] == 'smb')]
df_row_smb = carr_ltm_calculations(df_row_smb_filter, 'row_smb')

# # mm total carr
df_row_mm_filter = df_main[(df_main['region'] == 'row') & (df_main['size'] == 'mm')]
df_row_mm = carr_ltm_calculations(df_row_mm_filter, 'row_mm')

# # ent total carr
df_row_ent_filter = df_main[(df_main['region'] == 'row') & (df_main['size'] == 'ent')]
df_row_ent = carr_ltm_calculations(df_row_ent_filter, 'row_ent')

# Create output excel file for geo

# create a excel writer object
with pd.ExcelWriter(f"{report_date}-row-carr.xlsx") as writer:
   
    # use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    df_row_total.to_excel(writer, sheet_name="row-total-carr")
    df_row_smb.to_excel(writer, sheet_name="row-smb-carr")
    df_row_mm.to_excel(writer, sheet_name="row-mm-carr")
    df_row_ent.to_excel(writer, sheet_name="row-ent-carr")

In [None]:
####END########

In [None]:
arr_dfs.to_excel('arr-dfs-to-check-2.xlsx')

In [None]:
df_ltm_arr_agg.to_excel('ltm-arr-to-check-1.xlsx')

In [None]:
# ---> TEST # Currently active subscriptions
mask_active = (df1['start'].dt.to_period('M') <= current_period) & (df1['end'].dt.to_period('M') >= current_period)
df1.loc[mask_active].groupby('id').agg({'value': 'sum'})

In [None]:
# ---> TEST # Future committed non-renewal subscriptions
mask_future = (df1['commit'].dt.to_period('M') <= current_period) & (df1['start'].dt.to_period('M') > current_period) & (df1['renewal'].str.lower() != 'y')
df1.loc[mask_future].groupby('id').agg({'value': 'sum'})

In [None]:
# ---> TEST # Active non-cancelled subscriptions
mask_active_non_cancel = (df1['start'].dt.to_period('M') <= current_period) & (df1['end'].dt.to_period('M') >= current_period) & (df1['cancelled'].str.lower() != 'y')
df1.loc[mask_active_non_cancel].groupby('id').agg({'value': 'sum'})

In [None]:
# CARR = active_ + future - cancelled; if CARR > ARR then CARR = CARR else CARR = ARR
# CARR_BOOKINGS = active + future

In [None]:
# OLD Set up mask to find 0 values
mask_arr_eq_0 = df_ltm_arr_agg.eq(0)
mask_arr_eq_0 

In [None]:
# OLD Set up dataframe for arr churn 
df_arr_churn = df_ltm_arr_agg.diff(axis=1)[mask_arr_eq_0].agg(['sum']).rename(index={'sum': 'arr_churn'})
df_arr_churn    

In [None]:
# OLD Set up dataframe for new logo arr
df_arr_new_logo = df_ltm_arr_agg.diff(axis=1)[mask_arr_eq_0.shift(axis=1)].agg(['sum']).rename(index={'sum': 'arr_new_logo'})
df_arr_new_logo

In [None]:
df1.reindex(sorted(df1.columns), axis=1)