In [1]:
import os
import sys

# root path
ROOT = os.path.abspath(os.path.join(os.getcwd(), '..'))

# Add the project root to the Python path
if ROOT not in sys.path:
    sys.path.append(ROOT)

In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
import json
import os

# Query 1

In [18]:
from src.data.data_processing import pickelup_card_data, pickelup_transactions_data, pickelup_client_data

In [7]:
# Query 1: Find the card_id with the latest expiry date and the lowest credit limit amount.

card_df = pickelup_card_data(file_path="../data/processed/card_df.pkl")
card_df.head()

Unnamed: 0,card_id,client_id,card_brand,card_type,expires,has_chip,num_cards_issued,credit_limit,acct_open_date,year_pin_last_changed,card_on_dark_web
0,4524,825,Visa,Debit,2022-12-01,YES,2,24295,2002-09-01,2008,No
1,2731,825,Visa,Debit,2020-12-01,YES,2,21968,2014-04-01,2014,No
2,3701,825,Visa,Debit,2024-02-01,YES,2,46414,2003-07-01,2004,No
3,42,825,Visa,Credit,2024-08-01,NO,1,12400,2003-01-01,2012,No
4,4659,825,Mastercard,Debit (Prepaid),2009-03-01,YES,1,28,2008-09-01,2009,No


In [9]:
# Find the latest expiry date
latest_expiry = card_df['expires'].max()
cards_with_latest_expiry = card_df[card_df['expires'] == latest_expiry]

In [11]:
# From these cards, find the one with the lowest credit limit
min_credit_limit = cards_with_latest_expiry['credit_limit'].min()
card_with_min_credit_limit = cards_with_latest_expiry[cards_with_latest_expiry['credit_limit'] == min_credit_limit]

In [56]:
# Retrieve the card_id
query_1_card_id = int(card_with_min_credit_limit['card_id'].values[0])
print(query_1_card_id)

4137


# Query 2

In [22]:
# Query 2: Find the client_id that will retire within a year, has the lowest credit score, and highest debt.

client_df = pickelup_client_data(file_path="../data/processed/client_df.pkl")

Unnamed: 0,client_id,current_age,retirement_age,birth_year,birth_month,gender,address,latitude,longitude,per_capita_income,yearly_income,total_debt,credit_score,num_credit_cards
0,825,53,66,1966,11,Female,462 Rose Lane,34.0,-118.0,29278,59696,127613,787,5
1,1746,53,68,1966,12,Female,3606 Federal Boulevard,41.0,-74.0,37891,77254,191349,701,5
2,1718,81,67,1938,11,Female,766 Third Drive,34.0,-118.0,22681,33483,196,698,5
3,708,63,63,1957,1,Female,3 Madison Street,41.0,-74.0,163145,249925,202328,722,4
4,1164,43,70,1976,9,Male,9620 Valley Stream Drive,38.0,-122.0,53797,109687,183855,675,1


In [25]:
# Filter clients retiring within a year
client_df['years_to_retirement'] = client_df['retirement_age'] - client_df['current_age']
retiring_clients = client_df[client_df['years_to_retirement'] <= 1]

In [26]:
# Find the client with the lowest credit score
min_credit_score = retiring_clients['credit_score'].min()
clients_with_min_score = retiring_clients[retiring_clients['credit_score'] == min_credit_score]

In [55]:
# From these clients, find the one with the highest debt
max_debt = clients_with_min_score['total_debt'].max()
client_with_max_debt = clients_with_min_score[clients_with_min_score['total_debt'] == max_debt]

# Retrieve the client_id
query_2_client_id = int(client_with_max_debt['client_id'].values[0])

print(query_2_client_id)

1987


# Query 3

In [46]:
# Query 3: Find the transaction_id of an online purchase on December 31st with the highest absolute amount.

transactions_df = pickelup_transactions_data(file_path="../data/processed/transactions_df.pkl")
transactions_df = transactions_df[(transactions_df['date'].dt.month == 12) & (transactions_df['date'].dt.day == 31)]
transactions_df.set_index('id', inplace=True)

In [47]:
for name in transactions_df['merchant_city'].unique():
    if 'online' in name.lower():
        print(name)

ONLINE


In [52]:
# Filter online purchases
online_purchases = transactions_df[transactions_df['merchant_city'] == 'ONLINE']

# Find the transaction with the highest absolute amount
max_abs_amount = np.abs(online_purchases['amount']).max()

transaction_with_max_amount = online_purchases[online_purchases['amount'] == max_abs_amount]

In [54]:
# Retrieve the transaction_id
query_3_transaction_id = int(transaction_with_max_amount.index.values[0])
print(query_3_transaction_id)

10534178


# Query 4

In [81]:
# Query 4: Which client over the age of 40 made the most transactions with a Visa card in February 2016?
# Return the client_id, the card_id involved, and the total number of transactions.

client_df = pickelup_client_data(file_path="../data/processed/client_df.pkl")
clients_40 = client_df[client_df['current_age'] > 40]

transactions_df = pickelup_transactions_data(file_path="../data/processed/transactions_df.pkl")
trans_feb_2016 = transactions_df[(transactions_df['date'].dt.month == 2) & (transactions_df['date'].dt.year == 2016)]

card_df = pickelup_card_data(file_path="../data/processed/card_df.pkl")
visa_cards = card_df[card_df['card_brand'] == 'Visa']

In [82]:
selected_transactions = (
    trans_feb_2016[trans_feb_2016['client_id']
                   .isin(clients_40['client_id']) & 
                   (trans_feb_2016['card_id'].isin(visa_cards['card_id']))]
                         )

In [83]:
# Group by 'client_id' and 'card_id' and count transactions
transaction_counts = selected_transactions.groupby(['client_id', 'card_id']).size().reset_index(name='transaction_count')

  transaction_counts = selected_transactions.groupby(['client_id', 'card_id']).size().reset_index(name='transaction_count')


In [84]:
# Find the maximum number of transactions
max_transactions = transaction_counts['transaction_count'].max()

# Get the client(s) with the maximum transactions
top_client = transaction_counts[transaction_counts['transaction_count'] == max_transactions]

In [88]:
# Extract client_id, card_id, and total number of transactions
query_4_client_id = int(top_client['client_id'].values[0])
query_4_card_id = int(top_client['card_id'].values[0])
query_4_total_transactions = int(top_client['transaction_count'].values[0])

# Output

In [89]:
# Prepare the output dictionary
output = {
    "target": {
        "query_1": {
            "card_id": query_1_card_id
        },
        "query_2": {
            "client_id": query_2_client_id
        },
        "query_3": {
            "transaction_id": query_3_transaction_id
        },
        "query_4": {
            "client_id": query_4_client_id,
            "card_id": query_4_card_id,
            "number_transactions": query_4_total_transactions
        }
    }
}

# Save to predictions folder
with open('../predictions/predictions_1.json', 'w') as f:
    json.dump(output, f, indent=4)