In [15]:
import pandas as pd

df = pd.read_csv('../separate_dfs/premium_card_data.csv')
df['date'] = pd.to_datetime(df['date'])

df

Unnamed: 0,client_code,name,status,age,city,avg_monthly_balance_KZT,date,data_source,type,category,direction,amount,currency
0,1,,,29.0,,92643.0,2025-06-01 11:40:16,transfer,card_out,,out,9359.56,KZT
1,1,Айгерим,Зарплатный клиент,,Алматы,,2025-06-02 12:10:21,transaction,,Продукты питания,,18848.85,KZT
2,1,,,29.0,,92643.0,2025-06-02 13:10:59,transfer,card_out,,out,17590.68,KZT
3,1,,,29.0,,92643.0,2025-06-02 20:23:02,transfer,card_out,,out,19677.33,KZT
4,1,,,29.0,,92643.0,2025-06-03 09:50:44,transfer,card_out,,out,37686.28,KZT
...,...,...,...,...,...,...,...,...,...,...,...,...,...
16642,60,Ермек,Зарплатный клиент,,Кызылорда,,2025-08-31 10:00:00,transaction,,Кафе и рестораны,,12000.00,KZT
16643,60,Ермек,Зарплатный клиент,,Кызылорда,,2025-08-31 11:20:39,transaction,,Кафе и рестораны,,3489.42,KZT
16644,60,Ермек,Зарплатный клиент,,Кызылорда,,2025-08-31 12:10:15,transaction,,Продукты питания,,8255.46,KZT
16645,60,Ермек,Зарплатный клиент,,Кызылорда,,2025-08-31 17:00:58,transaction,,Кафе и рестораны,,4951.28,KZT


In [16]:
import pandas as pd
import numpy as np

df = pd.read_csv('../separate_dfs/premium_card_data.csv')
df['date'] = pd.to_datetime(df['date'])

# Fill balance data
df['avg_monthly_balance_KZT'] = df.groupby('client_code')['avg_monthly_balance_KZT'].transform('first')

# 1. Premium tier qualification
df['balance_tier'] = pd.cut(df['avg_monthly_balance_KZT'], 
                            bins=[0, 100000, 500000, 1000000, float('inf')],
                            labels=['Standard', 'Gold', 'Platinum', 'Black'])
df['balance_tier_score'] = df['balance_tier'].map({'Standard': 0.25, 'Gold': 0.5, 'Platinum': 0.75, 'Black': 1.0})

# 2. Premium category spending
df['is_premium_category'] = df['category'].isin(['Ювелирные украшения', 'Косметика и Парфюмерия', 'Кафе и рестораны', 'Спа и массаж']).astype(int)

# Calculate spending totals at client level
premium_spending = df[df['is_premium_category']==1].groupby('client_code')['amount'].sum().to_dict()
total_spending = df[df['data_source']=='transaction'].groupby('client_code')['amount'].sum().to_dict()
df['premium_spend_total'] = df['client_code'].map(premium_spending).fillna(0)
df['total_spend'] = df['client_code'].map(total_spending).fillna(0)
df['premium_spend_ratio'] = df['premium_spend_total'] / (df['total_spend'] + 1)

# 3. ATM and transfer metrics
df['is_atm'] = (df['type'] == 'atm_withdrawal').astype(int)
df['is_transfer'] = df['type'].isin(['p2p_out', 'card_out']).astype(int)
df['atm_count'] = df.groupby('client_code')['is_atm'].transform('sum')
df['transfer_count'] = df.groupby('client_code')['is_transfer'].transform('sum')

# Calculate transaction counts per client
transaction_counts = df.groupby('client_code').size().to_dict()
df['transaction_count'] = df['client_code'].map(transaction_counts)
df['fee_activity_score'] = (df['atm_count'] + df['transfer_count']) / df['transaction_count']

# 4. Activity metrics
date_ranges = df.groupby('client_code')['date'].agg(['min', 'max'])
date_ranges['days_active'] = (date_ranges['max'] - date_ranges['min']).dt.days + 1
days_active_dict = date_ranges['days_active'].to_dict()
df['days_active'] = df['client_code'].map(days_active_dict)
df['daily_activity_rate'] = df['transaction_count'] / df['days_active']

# 5. Premium spending consistency
# Calculate monthly spending for premium categories
df['month'] = df['date'].dt.to_period('M')
premium_monthly = df[df['is_premium_category']==1].groupby(['client_code', 'month'])['amount'].sum()
premium_std = premium_monthly.groupby('client_code').std().fillna(0).to_dict()
df['premium_spend_std'] = df['client_code'].map(premium_std).fillna(0)
df['premium_spend_consistency'] = 1 / (1 + df['premium_spend_std'] / (df['premium_spend_total'] / 3 + 1))

# 6. Benefit calculations
df['potential_cashback'] = (df['total_spend'] * 0.02) + (df['premium_spend_total'] * 0.02)
df['potential_fee_savings'] = (df['atm_count'] * 500) + (df['transfer_count'] * 200)
df['total_benefit'] = df['potential_cashback'] + df['potential_fee_savings']

print(f"Created features for {len(df)} rows")
print(f"Sample client spending - Total: {df['total_spend'].iloc[0]:,.0f}, Premium: {df['premium_spend_total'].iloc[0]:,.0f}")

Created features for 16647 rows
Sample client spending - Total: 1,164,025, Premium: 518,758


In [None]:
class PremiumCardScoringFramework:
    def __init__(self):
        self.product_name = 'premium_card'
        self.feature_importance = {}
        self.client_features = None
        self.scores = None
    
    def aggregate_to_client_level(self, df):
        agg_dict = {
            'avg_monthly_balance_KZT': 'first',
            'balance_tier_score': 'first',
            'premium_spend_total': 'first',
            'total_spend': 'first',
            'premium_spend_ratio': 'first',
            'atm_count': 'first',
            'transfer_count': 'first',
            'fee_activity_score': 'first',
            'transaction_count': 'first',
            'daily_activity_rate': 'mean',
            'premium_spend_consistency': 'mean',
            'potential_cashback': 'first',
            'potential_fee_savings': 'first',
            'total_benefit': 'first'
        }
        
        client_df = df.groupby('client_code').agg(agg_dict).reset_index()
        
        client_df['monthly_spend'] = client_df['total_spend'] / 3
        client_df['benefit_to_balance_ratio'] = client_df['total_benefit'] / (client_df['avg_monthly_balance_KZT'] + 1)
        client_df['engagement_score'] = (client_df['daily_activity_rate'] * client_df['fee_activity_score'])
        
        return client_df
    
    def calculate_feature_importance(self, client_df):
        self.feature_importance = {
            'total_spend': 0.25,
            'balance_tier_score': 0.20,
            'premium_spend_ratio': 0.15,
            'fee_activity_score': 0.10,
            'monthly_spend': 0.10,
            'premium_spend_consistency': 0.10,
            'engagement_score': 0.05,
            'benefit_to_balance_ratio': 0.05
        }
        return self.feature_importance
    
    def calculate_client_scores(self, client_df):
        from scipy.stats import percentileofscore
        
        # First, calculate absolute value score based on expected benefit
        max_benefit = client_df['total_benefit'].max()
        client_df['absolute_value_score'] = client_df['total_benefit'] / max_benefit
        
        # Original percentile-based scoring
        score_features = list(self.feature_importance.keys())
        
        for col in score_features:
            if col in client_df.columns:
                if client_df[col].dtype.name == 'category':
                    continue
                if col == 'balance_tier_score':
                    client_df[f'{col}_norm'] = client_df[col]
                    continue
                values = client_df[col].fillna(0).replace([np.inf, -np.inf], 0)
                if len(set(values)) > 1:
                    client_df[f'{col}_norm'] = [percentileofscore(values, x)/100 for x in values]
                else:
                    client_df[f'{col}_norm'] = 0.5
        
        # Calculate percentile-based score
        client_df['percentile_score'] = 0
        for feature, weight in self.feature_importance.items():
            if f'{feature}_norm' in client_df.columns:
                client_df['percentile_score'] += client_df[f'{feature}_norm'] * weight
        
        # Blend absolute value and percentile scores (50/50)
        client_df['composite_score'] = (client_df['percentile_score'] * 0.5) + (client_df['absolute_value_score'] * 0.5)
        
        # Apply tier boost
        client_df.loc[client_df['balance_tier_score'] >= 0.75, 'composite_score'] *= 1.05
        client_df['composite_score'] = client_df['composite_score'].clip(0, 1)
        
        
        client_df['score_percentile'] = client_df['composite_score'].rank(pct=True)
        client_df['recommendation_tier'] = pd.cut(
            client_df['composite_score'],
            bins=[0, 0.3, 0.5, 0.7, 1.0],
            labels=['Low', 'Medium', 'High', 'Very High']
        )
        
        client_df['expected_annual_benefit'] = client_df['total_benefit'] * 4
        
        return client_df
    
    def fit_score(self, df):
        print(f"\nProcessing Premium Card scoring...")
        print(f"Input: {len(df)} transactions from {df['client_code'].nunique()} clients")
        
        client_df = self.aggregate_to_client_level(df)
        print(f"Created {len(client_df)} client profiles")
        
        self.calculate_feature_importance(client_df)
        client_df = self.calculate_client_scores(client_df)
        
        self.client_features = client_df
        self.scores = client_df[['client_code', 'composite_score', 'score_percentile', 
                                 'recommendation_tier', 'expected_annual_benefit']]
        
        return self.scores

# Run scoring
scorer = PremiumCardScoringFramework()
scores = scorer.fit_score(df)

print("\nTop 10 Premium Card Prospects:")
top10 = scores.nlargest(10, 'composite_score')[['client_code', 'composite_score', 'expected_annual_benefit', 'recommendation_tier']]
print(top10)

# Check client 12
if 12 in scores['client_code'].values:
    client_12_score = scores[scores['client_code'] == 12]
    print(f"\nClient 12 Premium Card Score: {client_12_score['composite_score'].values[0]:.3f}")
    print(f"Expected Annual Benefit: {client_12_score['expected_annual_benefit'].values[0]:,.0f} KZT")
    print(f"Tier: {client_12_score['recommendation_tier'].values[0]}")
    
    # Compare with deposit_accumulative score
    print(f"\nComparison:")
    print(f"Deposit Accumulative Score: 0.716")
    print(f"Premium Card Score: {client_12_score['composite_score'].values[0]:.3f}")


Processing Premium Card scoring...
Input: 16647 transactions from 44 clients
Created 44 client profiles

Top 10 Premium Card Prospects:
    client_code  composite_score  expected_annual_benefit recommendation_tier
5             8         0.750000              495203.6344           Very High
28           42         0.709375              468078.0024           Very High
20           32         0.656875              462907.3720                High
22           35         0.620625              450584.5136                High
37           54         0.618750              442883.7112                High
9            12         0.611875              414953.0776                High
40           57         0.611250              442190.1096                High
8            11         0.584375              466533.5928                High
6             9         0.583750              447562.6200                High
25           38         0.578750              437911.2384                High

Clie

In [18]:
# Debug client 12's premium card scoring
client_12_features = scorer.client_features[scorer.client_features['client_code'] == 12]

print("Client 12 Raw Features:")
print(f"Total Spend: {client_12_features['total_spend'].values[0]:,.0f} KZT")
print(f"Premium Spend: {client_12_features['premium_spend_total'].values[0]:,.0f} KZT") 
print(f"Monthly Spend: {client_12_features['monthly_spend'].values[0]:,.0f} KZT")
print(f"Balance: {client_12_features['avg_monthly_balance_KZT'].values[0]:,.0f} KZT")
print(f"Balance Tier Score: {client_12_features['balance_tier_score'].values[0]}")
print(f"Premium Spend Ratio: {client_12_features['premium_spend_ratio'].values[0]:.2%}")

print("\nNormalized Scores (percentiles):")
for feature in scorer.feature_importance.keys():
    norm_col = f'{feature}_norm'
    if norm_col in client_12_features.columns:
        print(f"{feature}: {client_12_features[norm_col].values[0]:.2f}")

# Compare with other clients
print("\nComparison with all clients:")
print(scorer.client_features[['client_code', 'total_spend', 'composite_score']].sort_values('total_spend', ascending=False).head())

Client 12 Raw Features:
Total Spend: 1,533,414 KZT
Premium Spend: 863,500 KZT
Monthly Spend: 511,138 KZT
Balance: 849,046 KZT
Balance Tier Score: 0.75
Premium Spend Ratio: 56.31%

Normalized Scores (percentiles):
total_spend: 0.66
premium_spend_ratio: 0.89
fee_activity_score: 0.50
monthly_spend: 0.66
premium_spend_consistency: 0.86
engagement_score: 0.49
benefit_to_balance_ratio: 0.64

Comparison with all clients:
    client_code  total_spend  composite_score
5             8   2127177.97         0.750000
37           54   2064791.11         0.618750
28           42   2046412.39         0.709375
20           32   1998607.11         0.656875
8            11   1973998.08         0.584375
