In [44]:
import pandas as pd
import pickle
from scipy import sparse
from lightfm import LightFM
from sklearn.metrics.pairwise import cosine_similarity



In [3]:
def create_interaction_matrix(df, user_col, item_col, rating_col):
    interactions = df.groupby([user_col, item_col])[rating_col].sum().unstack().reset_index().fillna(0).set_index(user_col)
    return interactions

In [4]:
def create_user_dict(interactions):
    user_id = list(interactions.index)
    user_dict = {}
    counter = 0
    for i in user_id:
        user_dict[i] = counter
        counter += 1
    return user_dict

In [5]:
def create_item_dict(df, id_col, name_col):
    item_dict = {}
    for i in range(df.shape[0]):
        item_dict[(df.loc[i, id_col])] = df.loc[i, name_col]
    return item_dict

In [6]:
def runMF(interactions, n_components=30, loss='warp', k=15, epoch=30, n_jobs=4):
    x = interactions.values
    x = sparse.csr_matrix(interactions.values)
    model = LightFM(no_components=n_components, loss=loss, k=k)
    model.fit(x, epochs=epoch, num_threads=n_jobs)
    return model

In [7]:
def recommend_banking_product_to_user(model, interactions_trained, interactions_current, user_id, user_dict,
                               banking_product_dict, threshold=0, nrec_items=10):
    n_users, n_items = interactions_trained.shape
    user_x = user_dict[user_id]
    scores = pd.Series(model.predict(user_x, np.arange(n_items)))
    scores.index = interactions_current.columns
    scores = list(pd.Series(scores.sort_values(ascending=False).index))

    known_products = list(pd.Series(interactions_current.loc[user_id, :][interactions_current.loc[user_id, :] > threshold].index).sort_values(ascending=False))

    scores = [x for x in scores if int(x) not in known_products]
    return_score_list = scores[0:nrec_items]
    known_products = list(pd.Series(known_products).apply(lambda x: banking_product_dict[int(x)]))
    products_recommended = list(pd.Series(return_score_list).apply(lambda x: banking_product_dict[int(x)]))
    return return_score_list, products_recommended, known_products


In [8]:
def create_banking_product_emdedding_distance_matrix(model, interactions):
    df_item_norm_sparse = sparse.csr_matrix(model.item_embeddings)
    similarities = cosine_similarity(df_item_norm_sparse)
    item_emdedding_distance_matrix = pd.DataFrame(similarities)
    item_emdedding_distance_matrix.columns = interactions.columns
    item_emdedding_distance_matrix.index = interactions.columns
    return item_emdedding_distance_matrix


In [9]:
def recommend_banking_product_to_product(item_emdedding_distance_matrix, item_id,
                                banking_product_dict, n_items=10):
    recommended_product_id = list(pd.Series(item_emdedding_distance_matrix.loc[str(item_id), :].sort_values(ascending=False).head(n_items + 1).index[1:n_items + 1]))
    final_products = []
    for i in recommended_product_id:
        final_products.append(banking_product_dict[int(i)])
    return recommended_product_id, final_products

In [10]:
# Load data (replace with your file paths)
df_cc_trans = pd.read_csv('transaction_data.csv')
df_banking_products = pd.read_csv('banking_products.csv')

In [11]:
df_cc_trans = df_cc_trans.head(3)

In [12]:
df_banking_products = df_banking_products.head(3)

In [13]:
# Merge data
df_3 = pd.merge(df_cc_trans, df_banking_products, on='merchant_category')

In [14]:
df_3

Unnamed: 0,custID,merchant,merchant_category,ratings,banking_product_id,banking_product_name
0,10084,Hughes PLC,Travel,2.844368,1,Credit Card Person
1,10084,Hughes PLC,Travel,2.844368,3,Investment Nation


In [15]:
# Create interaction matrix
interactions_banking_products = create_interaction_matrix(df=df_3,
                                         user_col='custID',
                                         item_col='banking_product_id',
                                         rating_col='ratings')

In [16]:
interactions_banking_products

banking_product_id,1,3
custID,Unnamed: 1_level_1,Unnamed: 2_level_1
10084,2.844368,2.844368


In [17]:
# Create dictionaries
user_dict = create_user_dict(interactions=interactions_banking_products)
banking_product_dict = create_item_dict(df=df_banking_products,
                               id_col='banking_product_id',
                               name_col='banking_product_name')

In [18]:
user_dict

{10084: 0}

In [19]:
banking_product_dict

{1: 'Credit Card Person', 2: 'Investment Whether', 3: 'Investment Nation'}

In [1]:
# Train model
mf_model_banking_products = runMF(interactions=interactions_banking_products,
                 n_components=2,
                 loss='warp',
                 k=1,
                 epoch=1,
                 n_jobs=1)


NameError: name 'runMF' is not defined

In [21]:
# Generate user recommendations (example for user 10000)
return_score_list, products_recommended, known_products = \
    recommend_banking_product_to_user(model=mf_model_banking_products,
                               interactions_trained=interactions_banking_products,
                               interactions_current=interactions_banking_products,
                               user_id=10000,
                               user_dict=user_dict,
                               banking_product_dict=banking_product_dict,
                               threshold=1,
                               nrec_items=5)

KeyError: 10000

In [None]:
print("Recommended Banking Products for User 10000:", products_recommended)
print("Known Banking Products for User 10000:", known_products)

In [None]:
# Generate banking product-banking product recommendations (example for product ID 1)
banking_product_dist = create_banking_product_emdedding_distance_matrix(model=mf_model_banking_products,
                                                       interactions=interactions_banking_products)

In [None]:
recommended_product_id, final_products = \
    recommend_banking_product_to_product(item_emdedding_distance_matrix=banking_product_dist,
                                    item_id=1,
                                    banking_product_dict=banking_product_dict,
                                    n_items=3)

In [None]:
print("Recommended Similar Banking Products to Product 1:", final_products)

In [10]:
import pandas as pd
from faker import Faker
import random
import numpy as np

fake = Faker()

def generate_wells_fargo_data(num_rows=1000):
    """Generates a dataset with Wells Fargo specific products."""

    products = {
        "CheckingAccount": "ind_cco_fin_ult1",
        "SavingsAccount": "ind_ahor_fin_ult1",
        "CreditCard": "ind_tjcr_fin_ult1",
        "HomeLoan": "ind_hip_fin_ult1",
        "AutoLoan": "ind_pres_fin_ult1",
        "InvestmentAccount": "ind_valo_fin_ult1",
        "RetirementAccount": "ind_plan_fin_ult1",
        "MoneyMarketAccount": "ind_ctma_fin_ult1",
        "CDAccount": "ind_cder_fin_ult1",
        "DebitCard": "ind_deco_fin_ult1",
        "OnlineBanking": "ind_ecue_fin_ult1",
        "Mortgage": "ind_viv_fin_ult1",
        "DirectDeposit": "ind_nomina_ult1",
        "Pension": "ind_nom_pens_ult1",
        "BillPay": "ind_recibo_ult1",
        "PersonalLoan": "ind_dela_fin_ult1",
        "BrokerageAccount": "ind_fond_fin_ult1",
        "TrustAccount":"ind_ctju_fin_ult1",
        "OverdraftProtection":"ind_ctop_fin_ult1",
        "PrepaidCard":"ind_ctpp_fin_ult1",
        "MoneyTransfer":"ind_deme_fin_ult1",
        "DirectDebit":"ind_reca_fin_ult1",
        "BusinessAccount":"ind_aval_fin_ult1",
        "SafeDepositBox": "ind_sdep_fin_ult1" #Added Safe Deposit Box
    }

    data = {
        "email": [fake.email() for _ in range(num_rows)],
        "customer_id": [random.randint(1000000, 9999999) for _ in range(num_rows)],
        "employee_index": [random.choice(["N", "S"]) for _ in range(num_rows)],
        "country": [random.choice(["US", "CA", "MX"]) for _ in range(num_rows)],
        "sex": [random.choice(["M", "F", "O"]) for _ in range(num_rows)],
        "age": [random.randint(18, 80) for _ in range(num_rows)],
        "joindate": [fake.date_between(start_date="-10y", end_date="today").strftime("%d/%m/%y") for _ in range(num_rows)],
        "customer_index6": [random.randint(0, 10) for _ in range(num_rows)],
        "customer_life": [random.randint(1, 20) for _ in range(num_rows)],
        "customer_relation": [random.choice(["A", "B", "C"]) for _ in range(num_rows)],
        "res_index": [random.choice(["S", "N"]) for _ in range(num_rows)],
        "for_index": [random.choice(["S", "N"]) for _ in range(num_rows)],
        "prov_code": [random.randint(1, 56) for _ in range(num_rows)],
        "prov_name": [fake.city() for _ in range(num_rows)],
        "activity_index": [random.uniform(0, 1) for _ in range(num_rows)],
        "income": [random.uniform(20000, 200000) for _ in range(num_rows)],
        "segmentation": [random.choice(["Individual", "Business", "Premium"]) for _ in range(num_rows)],
    }

    for product_name, col_name in products.items():
        data[product_name] = [random.choice([0, 1]) for _ in range(num_rows)]

    df = pd.DataFrame(data)

    # Handle nullable floats for specific columns
    df["DirectDeposit"] = df["DirectDeposit"].astype('float64')
    df["Pension"] = df["Pension"].astype('float64')

    df["DirectDeposit"] = df["DirectDeposit"].apply(lambda x: x if random.random() < 0.8 else np.nan)
    df["Pension"] = df["Pension"].apply(lambda x: x if random.random() < 0.6 else np.nan)
    df["prov_code"] = df["prov_code"].apply(lambda x: x if random.random() < 0.9 else np.nan)
    df["activity_index"] = df["activity_index"].apply(lambda x: x if random.random() < 0.95 else np.nan)

    #Rename the columns to the orignal column names.
    df = df.rename(columns = {v:k for k,v in products.items()})

    return df

# Generate and display the dataset
wells_fargo_df = generate_wells_fargo_data(2000)
print(wells_fargo_df.head())
print(wells_fargo_df.dtypes)

wells_fargo_df.to_csv("dataset.csv", index=False)

                      email  customer_id employee_index country sex  age  \
0  stevensonamy@example.net      2140224              N      US   O   33   
1      robert91@example.net      1318233              N      CA   M   50   
2   jacksonbrad@example.com      1654961              N      MX   M   28   
3       zritter@example.com      2285310              S      MX   O   78   
4        zdavis@example.com      5786251              N      CA   M   49   

   joindate  customer_index6  customer_life customer_relation  ... BillPay  \
0  12/11/19                6              7                 C  ...       1   
1  01/06/19                4             20                 A  ...       0   
2  09/10/17               10             12                 A  ...       1   
3  07/04/24                2             11                 A  ...       0   
4  06/06/20                7              3                 B  ...       1   

  PersonalLoan  BrokerageAccount TrustAccount  OverdraftProtection  \
0   