## Import Libraries

In [1]:
import pandas as pd
import numpy as np

In [2]:
RANDOM_SEED = 42

## Data Pre-Processing

### Loading Datasets

In [3]:
training_data = pd.read_csv('data_format1/data_format1/train_format1.csv')
testing_data = pd.read_csv('data_format1/data_format1/test_format1.csv')
user_demo = pd.read_csv('data_format1/data_format1/user_info_format1.csv')
user_logs = pd.read_csv('data_format1/data_format1/user_log_format1.csv')
testing_data.drop('prob', axis=1, inplace=True)


In [4]:
training_data['type'] = 'train'
testing_data['type'] = 'test'
df_train = training_data
df = pd.concat([training_data, testing_data], ignore_index=True)

### Data Cleaning

In [5]:
# Check if any column in the tables contains any empty data
print(user_demo.columns[user_demo.isnull().any()].tolist())
print(user_logs.columns[user_logs.isnull().any()].tolist())

['age_range', 'gender']
['brand_id']


In [6]:
user_logs['user_id'] = user_logs['user_id'].astype(np.int32)
user_logs['item_id'] = user_logs['item_id'].astype(np.int32)
user_logs['cat_id'] = user_logs['cat_id'].astype(np.int16)
user_logs['seller_id'] = user_logs['seller_id'].astype(np.int16)
user_logs.rename(columns={'seller_id' : 'merchant_id'}, inplace=True)
user_logs['brand_id'].fillna(0, inplace=True)
user_logs['brand_id'] = user_logs['brand_id'].astype(np.int16)
user_logs['time_stamp'] = (pd.to_datetime(user_logs['time_stamp'], format='%m%d') - pd.to_datetime(user_logs['time_stamp'].min(), format='%m%d')).dt.days
user_logs['time_stamp'] = user_logs['time_stamp'].astype(np.int16)
user_logs['action_type'] = user_logs['action_type'].astype(np.int8)

In [7]:
user_demo

Unnamed: 0,user_id,age_range,gender
0,376517,6.0,1.0
1,234512,5.0,0.0
2,344532,5.0,0.0
3,186135,5.0,0.0
4,30230,5.0,0.0
...,...,...,...
424165,395814,3.0,1.0
424166,245950,0.0,1.0
424167,208016,,
424168,272535,6.0,1.0


In [8]:
user_demo['age_range'].fillna(0, inplace=True)
user_demo['gender'].fillna(2, inplace=True)
user_demo['age_range'] = user_demo['age_range'].astype(np.int8)
user_demo['gender'] = user_demo['gender'].astype(np.int8)

## Feature Engineering

In [9]:
# Grouping for easy aggregration
users = user_logs.groupby('user_id')
merchants = user_logs.groupby('merchant_id')
brands = user_logs.groupby('brand_id')
categories = user_logs.groupby('cat_id')
items = user_logs.groupby('item_id')
users_merchants = user_logs.groupby(['user_id', 'merchant_id'])
users_brands = user_logs.groupby(['user_id', 'brand_id'])
users_categories = user_logs.groupby(['user_id', 'cat_id'])
merchant_brands = user_logs.groupby(['merchant_id', 'brand_id'])
merchant_categories = user_logs.groupby(['merchant_id', 'cat_id'])
user_logs['time_period'] = user_logs['time_stamp'] // 31

In [10]:
seller_group = user_logs.groupby(["merchant_id","action_type"]).count()[["user_id"]].reset_index().rename(columns={'user_id':'count'})
seller_group.head()

Unnamed: 0,merchant_id,action_type,count
0,1,0,308236
1,1,1,444
2,1,2,17705
3,1,3,12755
4,2,0,2030


In [11]:
# One-hot encoding for the age_range column
age_dummies = pd.get_dummies(user_demo[['user_id', 'age_range']], columns=['age_range'], prefix='age')

df = df.merge(age_dummies, on='user_id', how='left')
df_train = df_train.merge(age_dummies, on='user_id', how='left')

In [12]:
df_test = df

### Overall Count and Action Count/Ratio Features

### Overall Count of Unique Values

In [13]:
# Count the number of unique values for other features grouped by user_id
user_unique_features = users.agg({
    'item_id': 'nunique',      # Count unique items per user
    'cat_id': 'nunique',       # Count unique categories per user
    'merchant_id': 'nunique',  # Count unique merchants per user
    'brand_id': 'nunique',     # Count unique brands per user
    'time_stamp': 'nunique',   # Count unique timestamps per user
    'time_period': 'nunique',  # Count unique time periods per user
    'action_type': 'nunique'   # Count unique action types per user
}).reset_index()

# Rename the columns for clarity
user_unique_features.rename(columns={
    'item_id': 'unique_items_user',
    'cat_id': 'unique_categories_user',
    'merchant_id': 'unique_merchants_user',
    'brand_id': 'unique_brands_user',
    'time_stamp': 'unique_dates_user',
    'time_period': 'unique_periods_user',
    'action_type': 'unique_action_types_user'
}, inplace=True)

df_test = df_test.merge(user_unique_features, on='user_id', how='left')

df_test.head()

Unnamed: 0,user_id,merchant_id,label,type,age_0,age_1,age_2,age_3,age_4,age_5,age_6,age_7,age_8,unique_items_user,unique_categories_user,unique_merchants_user,unique_brands_user,unique_dates_user,unique_periods_user,unique_action_types_user
0,34176,3906,0.0,train,False,False,False,False,False,False,True,False,False,256,45,109,108,47,6,3
1,34176,121,0.0,train,False,False,False,False,False,False,True,False,False,256,45,109,108,47,6,3
2,34176,4356,1.0,train,False,False,False,False,False,False,True,False,False,256,45,109,108,47,6,3
3,34176,2217,0.0,train,False,False,False,False,False,False,True,False,False,256,45,109,108,47,6,3
4,230784,4818,0.0,train,True,False,False,False,False,False,False,False,False,31,17,20,19,16,5,2


### Overall Action Count

In [14]:
# Count each action type for each user
user_action_counts = users['action_type'].value_counts().unstack(fill_value=0).reset_index().rename(columns={
    0: 'clicks_user', 
    1: 'carts_user', 
    2: 'purchases_user', 
    3: 'favourites_user'
}, inplace=True)

# Count each action type for each merchant
user_action_counts = users['action_type'].value_counts().unstack(fill_value=0).reset_index().rename(columns={
    0: 'clicks_user', 
    1: 'carts_user', 
    2: 'purchases_user', 
    3: 'favourites_user'
}, inplace=True)

# Count each action type for each brand
user_action_counts = users['action_type'].value_counts().unstack(fill_value=0).reset_index().rename(columns={
    0: 'clicks_user', 
    1: 'carts_user', 
    2: 'purchases_user', 
    3: 'favourites_user'
}, inplace=True)

# Count each action type for each item
user_action_counts = users['action_type'].value_counts().unstack(fill_value=0).reset_index().rename(columns={
    0: 'clicks_user', 
    1: 'carts_user', 
    2: 'purchases_user', 
    3: 'favourites_user'
}, inplace=True)

df.head()

Unnamed: 0,user_id,merchant_id,label,type,age_0,age_1,age_2,age_3,age_4,age_5,age_6,age_7,age_8
0,34176,3906,0.0,train,False,False,False,False,False,False,True,False,False
1,34176,121,0.0,train,False,False,False,False,False,False,True,False,False
2,34176,4356,1.0,train,False,False,False,False,False,False,True,False,False
3,34176,2217,0.0,train,False,False,False,False,False,False,True,False,False
4,230784,4818,0.0,train,True,False,False,False,False,False,False,False,False


In [15]:
# count total number of unique values from each feature for a given user 
to_merge = users.nunique().reset_index().rename(columns={
    'item_id': 'items_user', 
    'cat_id': 'categories_user',
    'merchant_id': 'merchants_user',
    'brand_id': 'brands_user',
    'time_stamp': 'dates_user',
    'time_period': 'periods_user',
    'action_type': 'action_types_user'
    })
df = df.merge(to_merge, on='user_id', how='left')

# count total number of unique values from each feature for a given merchant 
to_merge = merchants.nunique().reset_index().rename(columns={
    'item_id': 'items_merchant', 
    'cat_id': 'categories_merchant',
    'user_id': 'users_merchant',
    'brand_id': 'brands_merchant',
    'time_stamp': 'dates_merchant',
    'time_period': 'periods_merchant',
    'action_type': 'action_types_merchant'
    })
df = df.merge(to_merge, on='merchant_id', how='left')

# count total number of unique values from each feature for a given user and merchant
to_merge = users_merchants.nunique().reset_index().rename(columns={
    'item_id': 'items_user_merchant', 
    'cat_id': 'categories_user_merchant',
    'brand_id': 'brands_user_merchant',
    'time_stamp': 'dates_user_merchant',
    'time_period': 'periods_user_merchant',
    'action_type': 'action_types_user_merchant'
    })
df = df.merge(to_merge, on=['user_id', 'merchant_id'], how='left')

# count total actions by type for a given user
to_merge = users['action_type'].value_counts().unstack(fill_value=0).rename(columns={
    0: 'clicks_user',
    1: 'carts_user',
    2: 'purchases_user',
    3: 'favourites_user'
    })
df = df.merge(to_merge, on='user_id', how='left')

# count total actions by type for a given merchant
to_merge = merchants['action_type'].value_counts().unstack(fill_value=0).rename(columns={
    0: 'clicks_merchant', 
    1: 'carts_merchant',
    2: 'purchases_merchant',
    3: 'favourites_merchant'
    })
df = df.merge(to_merge, on='merchant_id', how='left')

# count total actions by type for a given pair (user, merchant)
to_merge = users_merchants['action_type'].value_counts().unstack(fill_value=0).rename(columns={
    0: 'clicks_user_merchant',
    1: 'carts_user_merchant',
    2: 'purchases_user_merchant',
    3: 'favourites_user_merchant'
    })
df = df.merge(to_merge, on=['user_id', 'merchant_id'], how='left')

### Double 11 Features

Double 11 features are counts of clicks, purchases, add-
to-favourites on the Double 11 day. The ratio of the double
11 counts to the overall counts are also calculated. For the
entity in Figure 2, its Double 11 click count is 1, its Double
11 click ratio is 1/12=0.083 and its Double 11 buy ratio is
1/2=0.5. If a user has a high Double 11 buy ratio, then the
user is more likely to be a one-time deal hunter.

In [16]:
DOUBLE_11= 184 # min date is 511
double_11_log = (user_logs[user_logs['time_stamp'] ==DOUBLE_11]).reset_index(drop=True)

double_11_users= double_11_log.groupby('user_id')
double_11_merchant = double_11_log.groupby('merchant_id')
ouble_11_categories = double_11_log.groupby('cat_id')
double_11_brand = double_11_log.groupby('brand_id')
double_11_item = double_11_log.groupby('item_id')
double_11_user_merchant = double_11_log.groupby(['user_id','merchant_id'])
# counts of clicks, purchase, addto favourite

# Count each action type for each user
# count total number of unique values from each feature for a given user 
# count total number of unique values from each feature for a given user 
to_merge = double_11_users.nunique().reset_index().rename(columns={
    'item_id': 'double_11_items_user', 
    'cat_id': 'double_11_categories_user',
    'merchant_id': 'double_11_merchants_user',
    'brand_id': 'double_11_brands_user',
    'time_stamp': 'double_11_dates_user',
    'time_period': 'double_11_periods_user',
    'action_type': 'double_11_action_types_user'
    })

df = df.merge(to_merge, on='user_id', how='left')

# count total number of unique values from each feature for a given merchant 
to_merge = double_11_merchant.nunique().reset_index().rename(columns={
    'item_id': 'double_11_items_merchant', 
    'cat_id': 'double_11_categories_merchant',
    'user_id': 'double_11_users_merchant',
    'brand_id': 'double_11_brands_merchant',
    'time_stamp': 'double_11_dates_merchant',
    'time_period': 'double_11_periods_merchant',
    'action_type': 'double_11_action_types_merchant'
    })
df = df.merge(to_merge, on='merchant_id', how='left')

# count total number of unique values from each feature for a given user and merchant
to_merge = double_11_user_merchant.nunique().reset_index().rename(columns={
    'item_id': 'double_11_items_user_merchant', 
    'cat_id': 'double_11_categories_user_merchant',
    'brand_id': 'double_11_brands_user_merchant',
    'time_stamp': 'double_11_dates_user_merchant',
    'time_period': 'double_11_periods_user_merchant',
    'action_type': 'double_11_action_types_user_merchant'
    })
df = df.merge(to_merge, on=['user_id', 'merchant_id'], how='left')

# count total actions by type for a given user
to_merge = double_11_users['action_type'].value_counts().unstack(fill_value=0).rename(columns={
    0: 'double_11_clicks_user',
    1: 'double_11_carts_user',
    2: 'double_11_purchases_user',
    3: 'double_11_favourites_user'
    })
df = df.merge(to_merge, on='user_id', how='left')

# count total actions by type for a given merchant
to_merge = double_11_merchant['action_type'].value_counts().unstack(fill_value=0).rename(columns={
    0: 'double_11_clicks_merchant', 
    1: 'double_11_carts_merchant',
    2: 'double_11_purchases_merchant',
    3: 'double_11_favourites_merchant'
    })
df = df.merge(to_merge, on='merchant_id', how='left')

# count total actions by type for a given pair (user, merchant)
to_merge = double_11_user_merchant['action_type'].value_counts().unstack(fill_value=0).rename(columns={
    0: 'double_11_clicks_user_merchant',
    1: 'double_11_carts_user_merchant',
    2: 'double_11_purchases_user_merchant',
    3: 'double_11_favourites_user_merchant'
    })
df = df.merge(to_merge, on=['user_id', 'merchant_id'], how='left')

print(df.head(5))

   user_id  merchant_id  label   type  age_0  age_1  age_2  age_3  age_4   
0    34176         3906    0.0  train  False  False  False  False  False  \
1    34176          121    0.0  train  False  False  False  False  False   
2    34176         4356    1.0  train  False  False  False  False  False   
3    34176         2217    0.0  train  False  False  False  False  False   
4   230784         4818    0.0  train   True  False  False  False  False   

   age_5  ...  double_11_purchases_user  double_11_favourites_user   
0  False  ...                        11                          1  \
1  False  ...                        11                          1   
2  False  ...                        11                          1   
3  False  ...                        11                          1   
4  False  ...                         1                          0   

   double_11_clicks_merchant  double_11_carts_merchant   
0                       1667                         2  \
1     

### Ratio

In [17]:
groups = ['user', 'merchant', 'user_merchant']
action_types = ['clicks', 'carts', 'purchases', 'favourites']

EPSILON = 1e-8

# Compute ratios for features in user, merchant, and user-merchant categories
for group in groups:
    for feature in ['items', 'categories', 'brands', 'dates', 'periods', 'action_types']:
        # Feature ratio for each group relative to itself
        df[f'double_11_{feature}_{group}_ratio'] = df[f'double_11_{feature}_{group}'] / (df[f'double_11_{feature}_{group}'] + EPSILON)
        df[f'double_11_{feature}_{group}_ratio'] = df[f'double_11_{feature}_{group}'] / (df[f'{feature}_{group}'] + EPSILON)

# Compute ratios for actions from the user perspective
for action in action_types:
    # Ratio within double_11 actions
    df[f'double_11_{action}_user_ratio'] = df[f'double_11_{action}_user'] / (
        df['double_11_clicks_user'] + df['double_11_carts_user'] + df['double_11_purchases_user'] + df['double_11_favourites_user'] + EPSILON
    )
    df[f'double_11_{action}_user_ratio'] = df[f'double_11_{action}_user'] / (df[f'{action}_user'] + EPSILON)

# Compute ratios for actions from the merchant perspective
for action in action_types:
    # Ratio within double_11 actions
    df[f'double_11_{action}_merchant_ratio'] = df[f'double_11_{action}_merchant'] / (
        df['double_11_clicks_merchant'] + df['double_11_carts_merchant'] + df['double_11_purchases_merchant'] + df['double_11_favourites_merchant'] + EPSILON
    )
    df[f'double_11_{action}_merchant_ratio'] = df[f'double_11_{action}_merchant'] / (df[f'{action}_merchant'] + EPSILON)

# Compute ratios for actions from the user-merchant perspective
for action in action_types:
    # Ratio within double_11 actions
    df[f'double_11_{action}_user_merchant_ratio'] = df[f'double_11_{action}_user_merchant'] / (
        df['double_11_clicks_user_merchant'] + df['double_11_carts_user_merchant'] + df['double_11_purchases_user_merchant'] + df['double_11_favourites_user_merchant'] + EPSILON
    )
    # Ratio of actions compared to total user-merchant actions
    df[f'double_11_{action}_user_merchant_ratio'] = df[f'double_11_{action}_user_merchant'] / (df[f'{action}_user_merchant'] + EPSILON)

# Ratio of actions in each merchant (user perspective)
for action in action_types:
    df[f'double_11_{action}_in_merchant_ratio_perspective'] = df[f'double_11_{action}_user_merchant'] / (df[f'double_11_{action}_user'] + EPSILON)

# Ratio of actions in each merchant (merchant perspective)
for action in action_types:
    df[f'double_11_{action}_by_user_ratio_perspective'] = df[f'double_11_{action}_user_merchant'] / (df[f'double_11_{action}_merchant'] + EPSILON)

# Ratio of each action type for a given user
for action in action_types:
    df[f'double_11_{action}_user_ratio'] = df[f'double_11_{action}_user'] / (
        df['double_11_clicks_user'] + df['double_11_carts_user'] + df['double_11_purchases_user'] + df['double_11_favourites_user'] + EPSILON
    )

# Ratio of each action type for a given merchant
for action in action_types:
    df[f'double_11_{action}_merchant_ratio'] = df[f'double_11_{action}_merchant'] / (
        df['double_11_clicks_merchant'] + df['double_11_carts_merchant'] + df['double_11_purchases_merchant'] + df['double_11_favourites_merchant'] + EPSILON
    )

# Ratio of each action type for a given pair (user, merchant)
for action in action_types:
    df[f'double_11_{action}_user_merchant_ratio'] = df[f'double_11_{action}_user_merchant'] / (
        df['double_11_clicks_user_merchant'] + df['double_11_carts_user_merchant'] + df['double_11_purchases_user_merchant'] + df['double_11_favourites_user_merchant'] + EPSILON
    )

# Display the first 10 rows to verify


### Latest one week

In [18]:
latest_one_week_log = user_logs[
    (user_logs['time_stamp'] >= DOUBLE_11 - 7) & 
    (user_logs['time_stamp'] < DOUBLE_11 )
].reset_index(drop=True)


latest_one_week_users = latest_one_week_log.groupby('user_id')
latest_one_week_merchant = latest_one_week_log.groupby('merchant_id')
latest_one_week_categories = latest_one_week_log.groupby('cat_id')
latest_one_week_brand = latest_one_week_log.groupby('brand_id')
latest_one_week_item = latest_one_week_log.groupby('item_id')
latest_one_week_user_merchant = latest_one_week_log.groupby(['user_id', 'merchant_id'])

# Count each action type for each user
# Count total number of unique values from each feature for a given user
to_merge = latest_one_week_users.nunique().reset_index().rename(columns={
    'item_id': 'latest_one_week_items_user', 
    'cat_id': 'latest_one_week_categories_user',
    'merchant_id': 'latest_one_week_merchants_user',
    'brand_id': 'latest_one_week_brands_user',
    'time_stamp': 'latest_one_week_dates_user',
    'time_period': 'latest_one_week_periods_user',
    'action_type': 'latest_one_week_action_types_user'
})

df = df.merge(to_merge, on='user_id', how='left')

# Count total number of unique values from each feature for a given merchant
to_merge = latest_one_week_merchant.nunique().reset_index().rename(columns={
    'item_id': 'latest_one_week_items_merchant', 
    'cat_id': 'latest_one_week_categories_merchant',
    'user_id': 'latest_one_week_users_merchant',
    'brand_id': 'latest_one_week_brands_merchant',
    'time_stamp': 'latest_one_week_dates_merchant',
    'time_period': 'latest_one_week_periods_merchant',
    'action_type': 'latest_one_week_action_types_merchant'
})
df = df.merge(to_merge, on='merchant_id', how='left')

# Count total number of unique values from each feature for a given user and merchant
to_merge = latest_one_week_user_merchant.nunique().reset_index().rename(columns={
    'item_id': 'latest_one_week_items_user_merchant', 
    'cat_id': 'latest_one_week_categories_user_merchant',
    'brand_id': 'latest_one_week_brands_user_merchant',
    'time_stamp': 'latest_one_week_dates_user_merchant',
    'time_period': 'latest_one_week_periods_user_merchant',
    'action_type': 'latest_one_week_action_types_user_merchant'
})
df = df.merge(to_merge, on=['user_id', 'merchant_id'], how='left')

# Count total actions by type for a given user
to_merge = latest_one_week_users['action_type'].value_counts().unstack(fill_value=0).rename(columns={
    0: 'latest_one_week_clicks_user',
    1: 'latest_one_week_carts_user',
    2: 'latest_one_week_purchases_user',
    3: 'latest_one_week_favourites_user'
})
df = df.merge(to_merge, on='user_id', how='left')

# Count total actions by type for a given merchant
to_merge = latest_one_week_merchant['action_type'].value_counts().unstack(fill_value=0).rename(columns={
    0: 'latest_one_week_clicks_merchant', 
    1: 'latest_one_week_carts_merchant',
    2: 'latest_one_week_purchases_merchant',
    3: 'latest_one_week_favourites_merchant'
})
df = df.merge(to_merge, on='merchant_id', how='left')

# Count total actions by type for a given pair (user, merchant)
to_merge = latest_one_week_user_merchant['action_type'].value_counts().unstack(fill_value=0).rename(columns={
    0: 'latest_one_week_clicks_user_merchant',
    1: 'latest_one_week_carts_user_merchant',
    2: 'latest_one_week_purchases_user_merchant',
    3: 'latest_one_week_favourites_user_merchant'
})
df = df.merge(to_merge, on=['user_id', 'merchant_id'], how='left')

print(df.head(5))


   user_id  merchant_id  label   type  age_0  age_1  age_2  age_3  age_4   
0    34176         3906    0.0  train  False  False  False  False  False  \
1    34176          121    0.0  train  False  False  False  False  False   
2    34176         4356    1.0  train  False  False  False  False  False   
3    34176         2217    0.0  train  False  False  False  False  False   
4   230784         4818    0.0  train   True  False  False  False  False   

   age_5  ...  latest_one_week_purchases_user   
0  False  ...                             4.0  \
1  False  ...                             4.0   
2  False  ...                             4.0   
3  False  ...                             4.0   
4  False  ...                             0.0   

   latest_one_week_favourites_user  latest_one_week_clicks_merchant   
0                              0.0                             1808  \
1                              0.0                            15572   
2                              0.0 

In [19]:
groups = ['user', 'merchant', 'user_merchant']
action_types = ['clicks', 'carts', 'purchases', 'favourites']

EPSILON = 1e-8

# Compute ratios for features in user, merchant, and user-merchant categories
for group in groups:
    for feature in ['items', 'categories', 'brands', 'dates', 'periods', 'action_types']:
        # Feature ratio for each group relative to itself
        df[f'latest_one_week_{feature}_{group}_ratio'] = df[f'latest_one_week_{feature}_{group}'] / (df[f'latest_one_week_{feature}_{group}'] + EPSILON)
        df[f'latest_one_week_{feature}_{group}_ratio'] = df[f'latest_one_week_{feature}_{group}'] / (df[f'{feature}_{group}'] + EPSILON)

# Compute ratios for actions from the user perspective
for action in action_types:
    # Ratio within latest_one_week actions
    df[f'latest_one_week_{action}_user_ratio'] = df[f'latest_one_week_{action}_user'] / (
        df['latest_one_week_clicks_user'] + df['latest_one_week_carts_user'] + df['latest_one_week_purchases_user'] + df['latest_one_week_favourites_user'] + EPSILON
    )
    df[f'latest_one_week_{action}_user_ratio'] = df[f'latest_one_week_{action}_user'] / (df[f'{action}_user'] + EPSILON)

# Compute ratios for actions from the merchant perspective
for action in action_types:
    # Ratio within latest_one_week actions
    df[f'latest_one_week_{action}_merchant_ratio'] = df[f'latest_one_week_{action}_merchant'] / (
        df['latest_one_week_clicks_merchant'] + df['latest_one_week_carts_merchant'] + df['latest_one_week_purchases_merchant'] + df['latest_one_week_favourites_merchant'] + EPSILON
    )
    df[f'latest_one_week_{action}_merchant_ratio'] = df[f'latest_one_week_{action}_merchant'] / (df[f'{action}_merchant'] + EPSILON)

# Compute ratios for actions from the user-merchant perspective
for action in action_types:
    # Ratio within latest_one_week actions
    df[f'latest_one_week_{action}_user_merchant_ratio'] = df[f'latest_one_week_{action}_user_merchant'] / (
        df['latest_one_week_clicks_user_merchant'] + df['latest_one_week_carts_user_merchant'] + df['latest_one_week_purchases_user_merchant'] + df['latest_one_week_favourites_user_merchant'] + EPSILON
    )
    # Ratio of actions compared to total user-merchant actions
    df[f'latest_one_week_{action}_user_merchant_ratio'] = df[f'latest_one_week_{action}_user_merchant'] / (df[f'{action}_user_merchant'] + EPSILON)

# Ratio of actions in each merchant (user perspective)
for action in action_types:
    df[f'latest_one_week_{action}_in_merchant_ratio_perspective'] = df[f'latest_one_week_{action}_user_merchant'] / (df[f'latest_one_week_{action}_user'] + EPSILON)

# Ratio of actions in each merchant (merchant perspective)
for action in action_types:
    df[f'latest_one_week_{action}_by_user_ratio_perspective'] = df[f'latest_one_week_{action}_user_merchant'] / (df[f'latest_one_week_{action}_merchant'] + EPSILON)

# Ratio of each action type for a given user
for action in action_types:
    df[f'latest_one_week_{action}_user_ratio'] = df[f'latest_one_week_{action}_user'] / (
        df['latest_one_week_clicks_user'] + df['latest_one_week_carts_user'] + df['latest_one_week_purchases_user'] + df['latest_one_week_favourites_user'] + EPSILON
    )

# Ratio of each action type for a given merchant
for action in action_types:
    df[f'latest_one_week_{action}_merchant_ratio'] = df[f'latest_one_week_{action}_merchant'] / (
        df['latest_one_week_clicks_merchant'] + df['latest_one_week_carts_merchant'] + df['latest_one_week_purchases_merchant'] + df['latest_one_week_favourites_merchant'] + EPSILON
    )

# Ratio of each action type for a given pair (user, merchant)
for action in action_types:
    df[f'latest_one_week_{action}_user_merchant_ratio'] = df[f'latest_one_week_{action}_user_merchant'] / (
        df['latest_one_week_clicks_user_merchant'] + df['latest_one_week_carts_user_merchant'] + df['latest_one_week_purchases_user_merchant'] + df['latest_one_week_favourites_user_merchant'] + EPSILON
    )
df.fillna(0, inplace=True)
# Display the first 10 rows to verify
df.head(5)


Unnamed: 0,user_id,merchant_id,label,type,age_0,age_1,age_2,age_3,age_4,age_5,...,latest_one_week_purchases_user_merchant_ratio,latest_one_week_favourites_user_merchant_ratio,latest_one_week_clicks_in_merchant_ratio_perspective,latest_one_week_carts_in_merchant_ratio_perspective,latest_one_week_purchases_in_merchant_ratio_perspective,latest_one_week_favourites_in_merchant_ratio_perspective,latest_one_week_clicks_by_user_ratio_perspective,latest_one_week_carts_by_user_ratio_perspective,latest_one_week_purchases_by_user_ratio_perspective,latest_one_week_favourites_by_user_ratio_perspective
0,34176,3906,0.0,train,False,False,False,False,False,False,...,0.0,0.0,0.073171,0.0,0.0,0.0,0.003319,0.0,0.0,0.0
1,34176,121,0.0,train,False,False,False,False,False,False,...,0.0,0.0,0.097561,0.0,0.0,0.0,0.000514,0.0,0.0,0.0
2,34176,4356,1.0,train,False,False,False,False,False,False,...,0.0,0.0,0.073171,0.0,0.0,0.0,0.015345,0.0,0.0,0.0
3,34176,2217,0.0,train,False,False,False,False,False,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,230784,4818,0.0,train,True,False,False,False,False,False,...,0.0,0.0,0.6,0.0,0.0,0.0,0.000527,0.0,0.0,0.0
