In [1]:
import pandas as pd
import numpy as np
import datetime
import random

import sys
sys.path.append("/Users/derekdewald/Documents/Python/Github_Repo/d_py_functions")

from synthetic_mbr import branch_mbr_composition_dict,mbr_profile_dict,legacy_city_dict, general_assumptions,pick_from_dict,calculate_rng_from_df_low_high,simplistic_engagement_calculation,create_column_from_dict_distribution,calculate_distribution_from_dictlist,create_mbr_information,create_random_value_from_dict, decouple_txn,monthly_payment_dict,pos_txn_dict,random_uniform_normalized_list,random_choice_from_uniform_list,random_uniform_normalized_df,replicate_df_row,flatten_clean_dict

try:
    rng = np.random.default_rng(seed)
except:
    rng = np.random.default_rng()

## Generation of Input Data, including Lists, Non Static Dictionaries, etc

In [2]:
# List of Potential Base line Branch Composition of Members 
branch_mbr_composition_templates = list(branch_mbr_composition_dict.keys())

# Default "Suggested" distribution of Potential Base Line Branch Compositions
branch_mbr_composition_templates_perc = [branch_mbr_composition_dict[x]['perc_'] for x in branch_mbr_composition_templates]

# Total Number of Members, Randomly Selected
total_mbrs = random.randint(general_assumptions['NUMBER_OF_MEMBERS']['value'][0],general_assumptions['NUMBER_OF_MEMBERS']['value'][1])


## Generation of Data

### Member Profile DataFrame

In [3]:
# DataFrame on Member Profile Attributes
mbr_profile_df = flatten_clean_dict(mbr_profile_dict,index_name='CLASSIFICATION')
mbr_profile_df

Unnamed: 0,CLASSIFICATION,age_low,age_high,liquid_assets_low,liquid_assets_high,weight,primary_is_beem_low,primary_is_beem_high,kids_low,kids_high,...,bill_pay_debit_low,bill_pay_debit_high,pos_txn_debit_low,pos_txn_debit_high,investment_debit_low,investment_debit_high,payroll_deposit_low,payroll_deposit_high,other_deposit_low,other_deposit_high
0,EARLY_CAREER_PROFESSIONAL,22,30,5000,40000,0.05,0.25,0.4,0,0.25,...,0.1,0.35,0.3,0.45,0.05,0.12,0.8,0.95,0.0,0.1
1,MID_CAREER_PROFESSIONAL,30,50,50000,400000,0.05,0.3,0.5,0,0.5,...,0.15,0.4,0.25,0.4,0.1,0.2,0.85,0.98,0.0,0.1
2,LATE_CAREER_PROFESSIONAL,45,70,250000,2000000,0.02,0.2,0.6,0,1.0,...,0.1,0.25,0.2,0.35,0.15,0.3,0.8,0.95,0.05,0.2
3,FIXED_INCOME_SENIOR,65,90,5000,80000,0.2,0.4,0.7,0,1.0,...,0.1,0.25,0.25,0.4,0.05,0.12,0.0,0.1,0.6,0.9
4,LOW_INCOME_WORKER,25,60,0,15000,0.15,0.1,0.3,0,1.0,...,0.1,0.2,0.35,0.55,0.02,0.07,0.8,0.95,0.0,0.1
5,RECENT_GRADUATE,21,27,1000,15000,0.05,0.1,0.4,0,0.0,...,0.2,0.4,0.35,0.55,0.03,0.1,0.85,0.98,0.0,0.08
6,STUDENT,18,25,0,5000,0.05,0.2,0.5,0,0.0,...,0.1,0.3,0.4,0.6,0.0,0.05,0.1,0.4,0.2,0.6
7,RETIREMENT_READY,55,80,400000,1500000,0.15,0.5,0.7,0,1.0,...,0.1,0.25,0.2,0.35,0.15,0.3,0.6,0.85,0.1,0.3
8,PAYCHECK_TO_PAYCHECK,22,65,0,5000,0.16,0.3,0.5,0,1.0,...,0.1,0.25,0.4,0.6,0.0,0.05,0.85,0.98,0.0,0.1
9,FINANCIALLY_STRESSED,25,65,0,2000,0.12,0.3,0.7,0,1.0,...,0.1,0.25,0.45,0.65,0.0,0.05,0.8,0.95,0.0,0.1


### General Assumption DataFrame

In [4]:
# DataFrame of General Assumption Attributes
general_assumptions_df = flatten_clean_dict(general_assumptions,index_name='CLASSIFICATION')[['CLASSIFICATION','Description','value_low','value_high','model_status']]
general_assumptions_df

Unnamed: 0,CLASSIFICATION,Description,value_low,value_high,model_status
0,OPERATIONAL_ATTRITION,Attrition which occurs as the result of Daily ...,0.0005,0.005,1
1,MEMBER_HEALTH,Attrition which occurs as the result of inhere...,0.0001,0.0005,1
2,INTEGRATION_IMPACT,Attrition which occurs as the result of frustr...,0.0001,0.0005,0
3,PERFORMANCE_RELATED,Attrition which occurs as the result of impact...,0.0001,0.0005,0
4,NUMBER_OF_KIDS,Allocation as to the Number of Kids a Member h...,,,0
5,MTG_Multiplier,Multiplier to apply to Random Mortgage Valuati...,,,0
6,Legacy_efficieny_factor_dict,Random Value Created to Implement slightly Les...,,,0
7,NUMBER_OF_MEMBERS,Number of members to be included in Dataframe,100000.0,200000.0,0


### Branch DataFrame

In [5]:
# Create Branch DataFrame
branch_df = random_uniform_normalized_df(unique_records=40,
                                         name='BRANCHNAME',
                                         LEGACY=[.5,.15,.3,.05])

# Add Random City from Selection List such that they are explicitly defined based on Legacy
branch_df["CITY"] = branch_df["LEGACY"].apply(lambda legacy: pick_from_dict(legacy_city_dict, legacy))

# Add a Mortgage Multipler, again defined based on Legacy 
branch_df['MTG_MULTIPLIER'] = branch_df['LEGACY'].map(general_assumptions['MTG_Multiplier']['value'])

# Branch Efficiency Factor
branch_df['BEF'] = [np.random.uniform(.98,1.05) for x in range(len(branch_df))]

# Legacy Efficiency Factor
branch_df['LEF'] = branch_df['LEGACY'].map(general_assumptions['Legacy_efficieny_factor_dict']['value'])

# Expected Growth Rate (I wanted to Keep this Small, Not Large)
branch_df['EXP_GROWTH'] = branch_df['LEF']*branch_df['BEF'] 

# Select the Default Member Composition Profile to which Classification members will be utilized
branch_df['BRANCH_MBR_COMPOSITION_CLASS'] = [random.choices(branch_mbr_composition_templates,weights=branch_mbr_composition_templates_perc)[0] for x in range(0,len(branch_df))]

# Total Number of Members based on Perc allocated and total number as defined in General Assumptions
branch_df['MEMBERS'] = branch_df['PERC_'].apply(lambda x:int(x*total_mbrs))

display(branch_df.head())

Unnamed: 0,BRANCHNAME,PERC_,LEGACY,CITY,MTG_MULTIPLIER,BEF,LEF,EXP_GROWTH,BRANCH_MBR_COMPOSITION_CLASS,MEMBERS
0,BRANCHNAME 1,0.007026,LEGACY 4,Fort St. John,0.6,1.035116,1.08,1.117925,dict_4,861
1,BRANCHNAME 2,0.007117,LEGACY 3,Vernon,0.75,1.011091,1.03,1.041423,dict_1,873
2,BRANCHNAME 3,0.056931,LEGACY 1,North Vancouver,1.25,0.999654,0.98,0.979661,dict_3,6983
3,BRANCHNAME 4,0.035383,LEGACY 3,Kelowna,0.75,1.01072,1.03,1.041042,dict_1,4340
4,BRANCHNAME 5,0.020029,LEGACY 2,North Vancouver,1.25,1.007732,0.97,0.9775,dict_1,2457


### Member DataFrame

In [6]:
mbr_df = create_mbr_information(branch_df,branch_mbr_composition_dict,mbr_profile_df)
mbr_df

Unnamed: 0,MEMBERNBR,STATUS,BRANCHNAME,CITY,CLASSIFICATION,AGE,LIQUID_ASSETS,PRIMARY_IS_BEEM,KIDS,HOME_OWNER,...,OTHER_DEPOSIT,NUMBER_KIDS,PERC_SPENDING_KNOWN,PERC_INCOME_KNOWN,MORTGAGE_BALANCE,DEPOSIT_BALANCE,PERC_MORTGAGE,PERC_DEPOSIT,ENGAGEMENT_SCORE,EXP_GROWTH
0,1,Active,BRANCHNAME 1,Fort St. John,RETIREMENT_READY,71.97,1451646.24,0,0.18,1,...,0.00,0.0,0.67,0.85,0.00,1451646.24,0.0,1.0,0.2,1.117925
1,2,Active,BRANCHNAME 1,Fort St. John,FIXED_INCOME_SENIOR,82.74,16777.48,0,0.13,1,...,0.00,0.0,0.61,0.67,0.00,0.00,0.0,0.0,0.0,1.117925
2,3,Active,BRANCHNAME 1,Fort St. John,RETIREMENT_READY,59.39,1303664.79,0,0.09,1,...,0.00,0.0,0.69,0.91,0.00,1303664.79,0.0,1.0,0.2,1.117925
3,4,Active,BRANCHNAME 1,Fort St. John,PAYCHECK_TO_PAYCHECK,36.85,1403.33,0,0.19,0,...,0.00,0.0,0.78,0.99,0.00,0.00,,0.0,0.5,1.117925
4,5,Active,BRANCHNAME 1,Fort St. John,FIXED_INCOME_SENIOR,87.09,30456.68,0,0.13,1,...,1013.02,0.0,0.66,0.81,0.00,0.00,0.0,0.0,0.2,1.117925
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2497,122649,Active,BRANCHNAME 40,West Kelowna,FIXED_INCOME_SENIOR,84.57,17202.40,1,0.27,0,...,2558.73,0.0,0.59,0.67,0.00,17202.40,,1.0,0.4,1.010182
2498,122650,Active,BRANCHNAME 40,West Kelowna,FIXED_INCOME_SENIOR,65.83,42718.53,1,0.50,1,...,3316.35,1.0,0.55,0.79,445611.65,42718.53,1.0,1.0,0.5,1.010182
2499,122651,Active,BRANCHNAME 40,West Kelowna,RETIREMENT_READY,58.16,1401630.52,1,0.05,1,...,1743.42,0.0,0.65,1.06,686511.33,1401630.52,1.0,1.0,0.7,1.010182
2500,122652,Active,BRANCHNAME 40,West Kelowna,FIXED_INCOME_SENIOR,70.18,17586.47,0,0.36,1,...,0.00,0.0,0.49,0.87,0.00,0.00,0.0,0.0,0.0,1.010182


### Transaction DataFrame

In [7]:
mbr_df

Unnamed: 0,MEMBERNBR,STATUS,BRANCHNAME,CITY,CLASSIFICATION,AGE,LIQUID_ASSETS,PRIMARY_IS_BEEM,KIDS,HOME_OWNER,...,OTHER_DEPOSIT,NUMBER_KIDS,PERC_SPENDING_KNOWN,PERC_INCOME_KNOWN,MORTGAGE_BALANCE,DEPOSIT_BALANCE,PERC_MORTGAGE,PERC_DEPOSIT,ENGAGEMENT_SCORE,EXP_GROWTH
0,1,Active,BRANCHNAME 1,Fort St. John,RETIREMENT_READY,71.97,1451646.24,0,0.18,1,...,0.00,0.0,0.67,0.85,0.00,1451646.24,0.0,1.0,0.2,1.117925
1,2,Active,BRANCHNAME 1,Fort St. John,FIXED_INCOME_SENIOR,82.74,16777.48,0,0.13,1,...,0.00,0.0,0.61,0.67,0.00,0.00,0.0,0.0,0.0,1.117925
2,3,Active,BRANCHNAME 1,Fort St. John,RETIREMENT_READY,59.39,1303664.79,0,0.09,1,...,0.00,0.0,0.69,0.91,0.00,1303664.79,0.0,1.0,0.2,1.117925
3,4,Active,BRANCHNAME 1,Fort St. John,PAYCHECK_TO_PAYCHECK,36.85,1403.33,0,0.19,0,...,0.00,0.0,0.78,0.99,0.00,0.00,,0.0,0.5,1.117925
4,5,Active,BRANCHNAME 1,Fort St. John,FIXED_INCOME_SENIOR,87.09,30456.68,0,0.13,1,...,1013.02,0.0,0.66,0.81,0.00,0.00,0.0,0.0,0.2,1.117925
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2497,122649,Active,BRANCHNAME 40,West Kelowna,FIXED_INCOME_SENIOR,84.57,17202.40,1,0.27,0,...,2558.73,0.0,0.59,0.67,0.00,17202.40,,1.0,0.4,1.010182
2498,122650,Active,BRANCHNAME 40,West Kelowna,FIXED_INCOME_SENIOR,65.83,42718.53,1,0.50,1,...,3316.35,1.0,0.55,0.79,445611.65,42718.53,1.0,1.0,0.5,1.010182
2499,122651,Active,BRANCHNAME 40,West Kelowna,RETIREMENT_READY,58.16,1401630.52,1,0.05,1,...,1743.42,0.0,0.65,1.06,686511.33,1401630.52,1.0,1.0,0.7,1.010182
2500,122652,Active,BRANCHNAME 40,West Kelowna,FIXED_INCOME_SENIOR,70.18,17586.47,0,0.36,1,...,0.00,0.0,0.49,0.87,0.00,0.00,0.0,0.0,0.0,1.010182


In [8]:
mbr_df.iloc[0]

MEMBERNBR                             1
STATUS                           Active
BRANCHNAME                 BRANCHNAME 1
CITY                      Fort St. John
CLASSIFICATION         RETIREMENT_READY
AGE                               71.97
LIQUID_ASSETS                1451646.24
PRIMARY_IS_BEEM                       0
KIDS                               0.18
HOME_OWNER                            1
ANNUAL_INCOME                 149372.07
DURATION                          14.63
CONSUMER_DEBT                  28221.63
MORTGAGE_DEBT                 696307.12
BILL_PAY_DEBIT                      0.0
POS_TXN_DEBIT                       0.0
INVESTMENT_DEBIT                    0.0
PAYROLL_DEPOSIT                     0.0
OTHER_DEPOSIT                       0.0
NUMBER_KIDS                         0.0
PERC_SPENDING_KNOWN                0.67
PERC_INCOME_KNOWN                  0.85
MORTGAGE_BALANCE                    0.0
DEPOSIT_BALANCE              1451646.24
PERC_MORTGAGE                       0.0


In [9]:
mbr_df = mbr_df.reset_index(drop=True)

In [10]:
monthly_bp_df['PROPERTY_TAX_PROV']*mbr_df['HOME_OWNER']].reset_index()

SyntaxError: unmatched ']' (4282554573.py, line 1)

In [None]:
exclude = ['INSURANCE_HOUSE','PROPERTY_TAX_PROV','PROPERTY_TAX_MUNI','UTILITIES_GAS','UTILITIES_HYDRO']
    
monthly_bp_df = decouple_txn(mbr_df[['BILL_PAY_DEBIT','MEMBERNBR','HOME_OWNER']],
                             reference_value='BILL_PAY_DEBIT',
                             txn_dict=monthly_payment_dict,
                             exclude_non_ho=exclude)

monthly_pos_df = decouple_txn(mbr_df[['POS_TXN_DEBIT','MEMBERNBR','HOME_OWNER']],
                              reference_value='POS_TXN_DEBIT',
                              txn_dict=pos_txn_dict)

final_txn_df = pd.concat([monthly_bp_df,monthly_pos_df])
final_txn_df

In [None]:
# List of postential Member Profiles
mbr_profile_list = list(mbr_profile_dict.keys())

# Random Distribution Percentage of Member Profiles
mbr_profile_perc = random_uniform_normalized_list(len(mbr_profile_list))

# Dictionary of Random Percentage Distribution
mbr_profile_dict = {mbr_profile_list[x]:mbr_profile_perc[x] for x in range(len(mbr_profile_list))}


In [None]:
from list_processing import random_uniform_normalized_list,random_choice_from_list
from df_processing import random_uniform_normalized_df

In [None]:


# Need to acknowledge that some payments DO NOT Occur Monthly, so their absence doesnt imply, Need to understand this
# Will only Model Monthly.







In [None]:
change_parameters = {'income':,
                     'kids':
                     'move':
                     'purchase_house':
                     'retirement_savings':
                     'unexpected_event':
                     'negative_beem_experience':
                     'random_attrituion'
                     
                     
                    
                    }

In [None]:
import pandas as pd
import numpy as np

rng = np.random.default_rng()

def random_uniform_normalized_list(n, skew=1) -> np.ndarray:

    """
    Function to create a list of RNG numbers for the purposes of creating a distribution.
    Values equal 1.

    Parameters:
        n(int): Number of Values to Return in list.
        skew(int): Skew to include in data, Values Greater than 0 will create 

    Returns:
        Object Type

    date_created:29-Dec-25
    date_last_modified: 29-Dec-25
    classification:TBD
    sub_classification:TBD
    usage:
        create_distribution_weight(5)


    """  
    # Generate random positive numbers
    raw = rng.random(n) ** skew  # apply skew
    weights = raw / raw.sum()    # normalize to sum to 1
    return [float(w) for w in weights]

def random_choice_from_uniform_list(total_records,
                                    name="Example",
                                    distinct_entities=0,
                                    list_distribution=[],
                                    return_value=None,
                                    skew=1):
    '''
    Create a random generate list from provided inputs. List is of length as defined in total records, the name of the records is defined in name. 
    The distribution of values is conditionally determined by either distinct entities, or the distribution as provided in list_distribution.

    Parameters:
        total_records(int): Number of records to be returned in list.
        name(str): Name of Random Records.
        distinct_entities(int): If populated, it will be used to generate a random distribution of defined values, also used as the number of reocrds
        list_distribution(list): Distribution to be used for random sampling.
        return_value(str): Default to None, and will return a list. Can input 'df' to return a dataframe
        skew(float): Skew to include in random distribution.
        
    Returns:
        list
        if return_value is 'df' then DataFrame

    date_created:29-Dec-25
    date_last_modified: 29-Dec-25
    classification:TBD
    sub_classification:TBD
    usage:
        random_uniform_normalized_df(unique_records=40,name='BRANCHNAME',LEGACY=[.5,.15,.3,.05])
    '''
    
    if (distinct_entities==0)&(list_distribution==[]):
        raise TypeError('User must select either Number of Distinct Entries or Provide a Distribution')
    
    if distinct_entities==0:
        distinct_entities = len(list_distribution)
        
    if len(list_distribution)==0:
        list_distribution = random_uniform_normalized_list(distinct_entities,skew=skew)
        
    name_list = [f"{name} {x+1}" for x in range(0,distinct_entities)]
    
    final_list = [random.choices(name_list,weights=list_distribution)[0] for x in range(0,total_records)]
    
    if return_value=='df':
        return pd.DataFrame(final_list,columns=[name])
    else:
        return final_list

def random_uniform_normalized_df(unique_records,
                                 name='Example',
                                 skew=1.25,
                                 **kwargs):
    '''
    Create a Dataframe (which is a series of n * 1) of Random Values for purposes of creating a Random Distribution DataFrame.
    Kwargs can be used to create New Columns. Kwargs should be Lists of distribution Frequencies, to create new random Columns (Not cdf).

    Parameters:
        unique records(int): Number, representing the number of random columns to be included in the output DF.
        name(str): Name of Column to Included (values will be numbered).
        skew(float): If Data is to have a skewed distribution, 1 will be normal uniform (mean=1,std_dev=0).
        **kwargs: Should be List of values equalling 1, to create a new random value.

    Returns:
        Object Type

    date_created:29-Dec-25
    date_last_modified: 29-Dec-25
    classification:TBD
    sub_classification:TBD
    usage:
        random_uniform_normalized_df(unique_records=40,name='BRANCHNAME',LEGACY=[.5,.15,.3,.05])
    
    '''
    obs_name_list = [f'{name} {x+1}' for x in range(0,unique_records)]
    dist_perc = random_uniform_normalized_list(unique_records,skew=skew)
    
    final_df = pd.DataFrame()
    
    for obs in range(0,unique_records):
        obs_name = obs_name_list[obs]
        perc_ = dist_perc[obs]
        temp_df = pd.DataFrame([[obs_name,perc_]],columns=[name,'PERC_'])
        final_df = pd.concat([final_df,temp_df])
            
    for kwarg_name, kwarg_value in kwargs.items():
        temp_df = random_choice_from_uniform_list(1000,name=kwarg_name,list_distribution=kwarg_value,return_value='df')
        final_df = final_df.reset_index(drop=True).merge(temp_df,left_index=True,right_index=True,how='left')
    
    return final_df

def replicate_df_row(df,records=5):
    
    '''
    Function which Replicates a single row DataFrame for the purposes of Multiplying it against a larger row.
    Function written using tile, which is a C based language, and considerably faster than straight using nunpy vectorized Calculations.

    Parameters:
        df(dataframe): DataFrame which you wish to extend, should be a Single Row, but techincally it will duplicate any size
        records(int): Number of times you wish DF to be duplicated, ideally it should be len(other_df) to which you want to multiply

    Returns:
        df

    date_created:30-Dec-25
    date_last_modified: 30-Dec-25
    classification:TBD
    sub_classification:TBD
    usage:
        df = pd.DataFrame([[1,2,3]],columns=['A','B','C'])
        replicate_row(df)
    
    '''
    
    row = df.to_numpy()
    columns = df.columns.tolist()

    # Repeat row N times using NumPy
    data = np.tile(row, (records, 1))  # shape (N, len(row))
    return pd.DataFrame(data, columns=columns)


