# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import ast


# Importing data

In [2]:
filepath = '/Users/amaanqureshi/Desktop/TB2/mini project/user_large_with_dates.csv'
df = pd.read_csv(filepath) #for classic
data = pd.read_csv(filepath) #for club

# Classic Eligible users list

In [3]:
def find_high_spenders(df):
    high_spenders = set()
    for index, row in df.iterrows():
        try:
            transactions = ast.literal_eval(row['user_date_transaction'])
            spends = ast.literal_eval(row['user_industry_spend'])
        except ValueError:
            continue  
        
        monthly_spend = {}
        for date, spend in zip(transactions, spends):
            date_obj = datetime.strptime(date, '%d/%m/%Y')
            year_month = date_obj.strftime('%Y-%m')
            monthly_spend[year_month] = monthly_spend.get(year_month, 0) + spend
        
        if any(spend > 6000 for spend in monthly_spend.values()):
            high_spenders.add(row['user_id'])
    
    return list(high_spenders)

high_spenders_list = find_high_spenders(df)

print(len(high_spenders_list)) 
print(high_spenders_list[:10]) 

6377
[32768, 98305, 65538, 98316, 65564, 98332, 32798, 32807, 32808, 65578]


# Classic Boolean

In [4]:
def safe_eval(str_eval): #because date format.. float error
    try:
        return ast.literal_eval(str_eval)
    except ValueError:
        return []

def calculate_monthly_spend(date_transactions, spend_transactions):
    dates = safe_eval(date_transactions) 
    spends = safe_eval(spend_transactions)
    monthly_spend = {}
    for date_str, spend in zip(dates, spends):
        try:
            date = datetime.strptime(date_str, '%d/%m/%Y')
            key = (date.year, date.month)
            monthly_spend[key] = monthly_spend.get(key, 0) + spend
        except ValueError:
            continue  
    return monthly_spend

def check_eligibility_for_classic(monthly_spend):
    return any(spend > 2000 for spend in monthly_spend.values())

data['monthly_spend'] = data.apply(lambda row: calculate_monthly_spend(row['user_date_transaction'], row['user_industry_spend']), axis=1)
data['eligible_for_classic'] = data['monthly_spend'].apply(check_eligibility_for_classic)

In [5]:
eligible_for_classic = data[data['eligible_for_classic'] == True][['user_id', 'eligible_for_classic']]
eligible_for_classic.head()

Unnamed: 0,user_id,eligible_for_classic
0,1000,True
1,10000,True
2,100000,True
3,100002,True
4,100019,True


In [6]:
eligibility_counts = data['eligible_for_classic'].value_counts()
print(eligibility_counts)

True     8142
False    6094
Name: eligible_for_classic, dtype: int64


# Club Lloyds eligible users

In [7]:
data['user_industry_interaction'] = data['user_industry_interaction']##.apply(ast.literal_eval)

target_industries = ['Cafes', 'Lunch / Light Meal', 'Coffee Beans and Teas', 'Dining', 'TV/Film']

def count_target_interactions(industry_list):
    return sum(industry in target_industries for industry in industry_list)

data['target_industry_interactions'] = data['user_industry_interaction'].apply(count_target_interactions)
threshold = 5

data['eligible_for_clublloyds'] = data['target_industry_interactions'] >= threshold

recommended_users = data[data['eligible_for_clublloyds']]
recommended_users[['user_id', 'eligible_for_clublloyds']].head()

Unnamed: 0,user_id,eligible_for_clublloyds


In [8]:
recommendation_counts = data['eligible_for_clublloyds'].value_counts()
print(recommendation_counts)

False    14236
Name: eligible_for_clublloyds, dtype: int64


# Cafe frequencies

In [9]:
def safe_convert_to_list(list_str):
    try:
        return ast.literal_eval(list_str)
    except (ValueError, SyntaxError):
        return []

    
    
def calculate_cafe_frequency(row):
    industries = row['user_industry_interaction']
    frequencies = row['user_industry_interaction_frequency']
    
    if isinstance(industries, str):
        industries = safe_convert_to_list(industries)
    if isinstance(frequencies, str):
        frequencies = safe_convert_to_list(frequencies)
    
    cafe_freq = sum(frequency for industry, frequency in zip(industries, frequencies) if industry in ['Cafes', 'Coffee Shops'])
    return cafe_freq

data['Cafe_frequency'] = data.apply(calculate_cafe_frequency, axis=1)
print(data[['user_id', 'Cafe_frequency']].head())

   user_id  Cafe_frequency
0     1000             813
1    10000              23
2   100000             828
3   100002             281
4   100019             214


# Dining Frequency

In [10]:
def calculate_dining_frequency(row):
    industries = row['user_industry_interaction']
    frequencies = row['user_industry_interaction_frequency']
    
    if isinstance(industries, str):
        industries = safe_convert_to_list(industries)
    if isinstance(frequencies, str):
        frequencies = safe_convert_to_list(frequencies)
    
    dining_freq = sum(frequency for industry, frequency in zip(industries, frequencies) if industry in ['Dining'])
    return dining_freq

data['Dining_frequency'] = data.apply(calculate_dining_frequency, axis=1)
data[['user_id', 'Dining_frequency']].head()

Unnamed: 0,user_id,Dining_frequency
0,1000,37
1,10000,51
2,100000,58
3,100002,33
4,100019,38


# Apparel shopping frequency

In [11]:
def calculate_apparel_frequency(row):
    industries = row['user_industry_interaction']
    frequencies = row['user_industry_interaction_frequency']
    
    if isinstance(industries, str):
        industries = safe_convert_to_list(industries)
    if isinstance(frequencies, str):
        frequencies = safe_convert_to_list(frequencies)
    
    dining_freq = sum(frequency for industry, frequency in zip(industries, frequencies) if industry in ['Apparel'])
    return dining_freq

data['Apparel_frequency'] = data.apply(calculate_apparel_frequency, axis=1)
data[['user_id', 'Apparel_frequency']].head()

Unnamed: 0,user_id,Apparel_frequency
0,1000,1
1,10000,23
2,100000,1
3,100002,32
4,100019,25


In [12]:
print(len(df['user_id']))


14236
