In [None]:
### DEFINING WEIGHTED AVERAGE OF RESIDUAL RETURNS MOMENTUM FACTOR CALCULATING FUNCTION

def get_wa_residual_factor(iter_date):
#iter_date = pd.to_datetime('2021-12-31')
#if True:                
    ### Defining internal function to perform market returns regression on country returns:
    def perform_regression(df_country):
        ### Minimum number of observations:
        int_min_number = 260
        ### Last values control interval length:
        int_last_control = 10      
        ### Dummy returns:
        (flo_intercept, flo_beta) = (np.NaN, np.NaN)
        ### Conditional regression performing:
        df_country = df_country.dropna()
        if ((df_country['Ret_Monthly'].count() >= int_min_number) & (df_country['Ret_Monthly'][-int_last_control :].count() > 0)):
            ### Activation of regression engine:
            reg_on_market.fit(df_country[['Market_Ret_Monthly']], df_country[['Ret_Monthly']])
            ### Beta extraction:
            flo_beta = reg_on_market.coef_[0][0]
            ### Interception extraction:
            flo_intercept = reg_on_market.intercept_[0]
        ### Results output:
        return (flo_intercept, flo_beta)    
    ### Length of interval to transform raw returns vector:    
    int_stand_win = int(260 * 84 / 12) 
    ### Length of returns series to calculate factor:    
    int_factor_win = int(260 * 12 / 12)
    ### Length of half-life period of exponential weights list:    
    int_half_life = int(260 * 9 / 12)
    ### Length of business month (measured in business days):
    int_bmonth_len = 21
    ### Length of interval to perform regression (measured in months):
    int_regression_month = 60    
    ### Start date to load raw data:
    date_stand_start = iter_date - pd.tseries.offsets.BDay(int_stand_win - 1) 
    ### Start date to extract returns to exponential weighted average calculation:     
    date_factor_start = iter_date - pd.tseries.offsets.BDay(int_factor_win - 1)
    ### Datasource of country returns for particular date (should be substituted by SQL query):
    list_countries = ser_ison_membership_daily[iter_date].dropna().index.get_level_values('Country').unique()
    ser_iter_ret_raw = ser_country_ret_raw.loc[date_stand_start : iter_date, list_countries]   
    ### Datasource of regional returns for particular date (should be substituted by SQL query):    
    ser_iter_region_raw = ser_region_ret_raw.loc[date_stand_start : iter_date, All]
    ### Date range to reindex raw data:    
    idx_iter_stand_range = pd.date_range(start = date_stand_start, end = iter_date, freq = 'B')
    ### Date range to extract returns to exponential weighted average calculation:     
    idx_iter_factor_range = pd.date_range(start = date_factor_start, end = iter_date, freq = 'B')     
    ### Raw country returns data transformation:
    ser_iter_ret_trans = ser_iter_ret_raw.groupby('Country').apply(get_country_vector, idx_iter_stand_range).swaplevel().sort_index()
    ### Raw regional returns data transformation:    
    ser_iter_region_trans = ser_iter_region_raw.groupby('Market').apply(get_country_vector, idx_iter_stand_range).swaplevel().sort_index()
    ### Dataframe to calculate residual returns:
    ### 1) Adding regional info to returns series (status on each series date)
    ### 2) Adding regional returns
    df_iter_ret_data = ser_iter_ret_trans.to_frame().join(ser_ison_membership_daily).dropna(subset = ['Market']).set_index('Market', append = True)\
                                .reset_index('Country').sort_index().join(ser_iter_region_trans).droplevel('Market').set_index('Country', append = True).sort_index()
    ### Welsh beta adjustment:
#    df_iter_ret_data['Ret_Welsh'] = df_iter_ret_data['Returns']
    df_iter_ret_data['Ret_Welsh'] = df_iter_ret_data['Returns'].clip(lower = df_iter_ret_data['Market_Returns'] - 3 * df_iter_ret_data['Market_Returns'].abs(),
                                                                     upper = df_iter_ret_data['Market_Returns'] + 3 * df_iter_ret_data['Market_Returns'].abs())
    ### Cumulative monthly returns calculation:
    df_iter_ret_data[['Country_Cum_Prod', 'Market_Cum_Prod']] = (1 + df_iter_ret_data[['Ret_Welsh', 'Market_Returns']].fillna(0.0)).groupby('Country').cumprod()
    df_iter_ret_data[['Ret_Monthly', 'Market_Ret_Monthly']] = (df_iter_ret_data[['Country_Cum_Prod', 'Market_Cum_Prod']] / \
                                                               df_iter_ret_data[['Country_Cum_Prod', 'Market_Cum_Prod']].groupby('Country').shift(int_bmonth_len)) - 1
    ### Fill monthly returns with NaN if last daily returns is NaN:
    df_iter_ret_data.loc[df_iter_ret_data['Returns'].isna(), 'Ret_Monthly'] = np.NaN
    df_iter_ret_data.loc[df_iter_ret_data['Market_Returns'].isna(), 'Market_Ret_Monthly'] = np.NaN    
    ### Choosing dates to one per month regression applying:
    idx_iter_regress_point = pd.date_range(end = iter_date, freq = pd.tseries.offsets.BDay(int_bmonth_len), periods = 12)
    ### Initialization of regression engine:
    reg_on_market = LinearRegression()
    ### Container to collect regression coefficients:
    list_params = []
    ### Looping over choosed dates:
    for iter_point in idx_iter_regress_point:
        ### Shifting calendar date to business date:
        iter_b_point = iter_point + pd.tseries.offsets.BDay(0)
        ### Extracting interval to perform regression:
        df_point_to_regress = df_iter_ret_data[['Ret_Monthly', 'Market_Ret_Monthly']]\
                                                .loc[iter_b_point + pd.DateOffset(days = 1) - pd.DateOffset(months = int_regression_month) : iter_b_point]
        ### Performing of regression (pair of Intercept & Beta as a result):
        ser_point_params = df_point_to_regress.groupby('Country').apply(perform_regression)
        ### Separating coefficient vectors to individual columns:
        df_point_params = pd.DataFrame(ser_point_params.to_list(), columns = ['Intercept', 'Beta'], index = ser_point_params.index)
        ### Adding parameters to collection:
        list_params.append(pd.concat({iter_b_point: df_point_params}, names = ['Date']))
    ### Transforming parameters collection to dataframe:
    df_iter_params = pd.concat(list_params).sort_index() 
    ### Adding parameters to calculate residuals and performing backfill to convert monthly data to daily:
    df_iter_ret_data = df_iter_ret_data.loc[idx_iter_factor_range].fillna(-1000.0).join(df_iter_params).groupby('Country').bfill().replace({-1000.0 : np.NaN})
    ### Residual returns calculation:
    df_iter_ret_data['Residual'] = df_iter_ret_data['Returns'] - \
                                    (df_iter_ret_data['Intercept'] / int_bmonth_len + df_iter_ret_data['Beta'] * df_iter_ret_data['Market_Returns'])
    ### Exponential weights series preparation:
    list_weight = list(map(lambda iter_num: exp_weight_single(int_half_life, iter_num), range(int_factor_win)))[::-1]    
    ser_iter_weight = pd.Series(list_weight, index = idx_iter_factor_range)
    ser_iter_weight.index.name = 'Date'
    ser_iter_weight.name = 'Weight'
    ### Residual Weighted Average Factor calculation:
    ser_iter_factor = np.log(1 + df_iter_ret_data['Residual']).groupby('Country').apply(weighted_average_grouped, ser_iter_weight)
    ser_iter_factor = np.exp(ser_iter_factor) - 1
    ### Add to csv file (should be substituted by SQL query):
    ser_iter_factor_csv = pd.concat({iter_date: ser_iter_factor}, names = ['Date'])
    ser_iter_factor_csv.to_csv(str_wa_residual_path, mode = 'a', header = not os.path.exists(str_wa_residual_path), sep = ';')
    ### Results output:
    return ser_iter_factor    