In [2]:
import os
import json
import pickle
from bisect import bisect_left, bisect_right
from datetime import datetime, timedelta
from collections import defaultdict, Counter

import numpy as np
import pandas as pd
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

# GPU hack if you need
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Data

Columns
- `party_rk` – client unique identifier
- `account_rk` – client account unique identifier
- `financial_account_type_cd` – debit/credit card flag
- `transaction_dttm` – operation datetime
- `transaction_type_desc` – purchase/payment/...
- `transaction_amt_rur` – transaction price
- `merchant_type` - DUTY FREE STORES/FUEL DEALERS/RESTAURANTS/ etc
- `merchant_group_rk` - McDonald's/Wildberries/ etc

It's important that table is already sorted by `transaction_dttm` column!!!

In [11]:
DATADIR = "./hackathon_data" # "./data"
transactions_path = f"{DATADIR}/avk_hackathon_data_transactions.csv"
data = pd.read_csv(f"{DATADIR}/avk_hackathon_data_transactions.csv")

In [15]:
data_balance = pd.read_csv(f"{DATADIR}/avk_hackathon_data_account_x_balance.csv")

In [5]:
data_soc = pd.read_csv(f"{DATADIR}/avk_hackathon_data_party_x_socdem.csv")

## Mappings
~1 min

In [None]:
# Prepare & save mappings
mappings = defaultdict(dict)
unk_token = "<UNK>"

def create_mapping(values):
    mapping = {unk_token: 0}
    for v in values:
        if not pd.isna(v):
            mapping[str(v)] = len(mapping)

    return mapping


for col in tqdm(
    [
        "transaction_type_desc",
        "merchant_rk",
        "merchant_type",
        "merchant_group_rk",
        "category",
        "financial_account_type_cd",
    ]
):

    col_values = (
        pd.read_csv(transactions_path, usecols=[col])[col]
        .fillna(unk_token)
        .astype(str)
    )
    mappings[col] = create_mapping(col_values.unique())
    del col_values


with open(f"{DATADIR}/mappings.json", "w") as f:
    json.dump(mappings, f)

In [12]:
# load mappings
unk_token = "<UNK>"
with open(f"{DATADIR}/mappings.json", 'r') as f:
     mappings = json.load(f)

## Parse transactions by users
~ 40 min

In [None]:
party2cat = defaultdict(list)
usecols = [
    "party_rk",
    "category",
]

for chunk in tqdm(
    pd.read_csv(transactions_path, usecols=usecols, chunksize=100_000w)
):
    chunk["category"] = (
        chunk["category"].fillna(unk_token).astype(str)
    )
    print(chunk["category"])  
    for i, row in chunk.iterrows():
        party2cat[row.party_rk].append(
            mappings["category"][row.category]
        )

    del chunk
        
pickle.dump(party2cat, open(f"{DATADIR}/party2cat.pkl", "wb"))

In [None]:
# Prepare & save client data
party2dates = defaultdict(list)  # for each party save a series of the transaction dates 
party2sum = defaultdict(list)  # for each party save a series of the transaction costs 
party2merchant_type = defaultdict(list)  # for each party save a series of the transaction_type 
party2trans_type = defaultdict(list)  # for each party save a series of the transaction merchant_type

usecols = [
    "party_rk",
    "transaction_dttm",
    "transaction_amt_rur",
    "merchant_type",
    "transaction_type_desc",
]

for chunk in tqdm(
    pd.read_csv(transactions_path, usecols=usecols, chunksize=100_000)
):

    chunk["merchant_type"] = (
        chunk["merchant_type"].fillna(unk_token).astype(str)
    )
    chunk["transaction_type_desc"] = (
        chunk["transaction_type_desc"].fillna(unk_token).astype(str)
    )
    chunk["transaction_amt_rur"] = chunk["transaction_amt_rur"].fillna(0)

    for i, row in chunk.iterrows():
        party2dates[row.party_rk].append(row.transaction_dttm)
        party2sum[row.party_rk].append(row.transaction_amt_rur)
        party2merchant_type[row.party_rk].append(
            mappings["merchant_type"][row.merchant_type]
        )
        party2trans_type[row.party_rk].append(
            mappings["transaction_type_desc"][row.transaction_type_desc]
        )

    del chunk

pickle.dump(party2dates, open(f"{DATADIR}/party2dates.pkl", "wb"))
pickle.dump(party2sum, open(f"{DATADIR}/party2sum.pkl", "wb"))
pickle.dump(party2merchant_type, open(f"{DATADIR}/party2merchant_type.pkl", "wb"))
pickle.dump(party2trans_type, open(f"{DATADIR}/party2trans_type.pkl", "wb"))

In [13]:
# load client data
party2dates = pickle.load(open(f"{DATADIR}/party2dates.pkl", 'rb'))
party2sum = pickle.load(open(f"{DATADIR}/party2sum.pkl", 'rb'))
party2merchant_type = pickle.load(open(f"{DATADIR}/party2merchant_type.pkl", 'rb'))
party2trans_type = pickle.load(open(f"{DATADIR}/party2trans_type.pkl", 'rb'))
party2cat = pickle.load(open(f"{DATADIR}/party2cat.pkl", 'rb'))

# Decision process

### Idea

As we had predictions of transactions, which can be performed by a particular person. So we decided to predict income/outcome of a person in that months in order to make recommendation. Based on that information we can suggest to him/her to participate in different kinds of challenges and quests.

### Investement suggestions
either to make an investement and buy shares, create an "automatic savings plan" or to make a personal plan of spending money, make a suggestion of how to spend less.

### Entertainment suggestions
Get quests and challenges for your favourite time spending. Cinemas, concerts and restaurants. 

In [6]:
mappings_cat = defaultdict(dict)
unk_token = "<UNK>"

for name in mappings['merchant_type'].keys():
    if name == unk_token:
        continue
    else:
        mappings_cat[mappings['merchant_type'][name]] = list()

col_values = (
        pd.read_csv(transactions_path, usecols=["merchant_type", 'category'])
        .fillna(unk_token)
        .astype(str)
    )

for i, row in tqdm(
    col_values.iterrows()
    ):
    
    if mappings["category"][row.category] not in mappings_cat[mappings['merchant_type'][row.merchant_type]]:
        mappings_cat[mappings['merchant_type'][row.merchant_type]].append(mappings["category"][row.category])
        
with open(f"{DATADIR}/mappings_cat_merchant_type.json", "w") as f:
    json.dump(mappings_cat, f)

11987617it [15:59, 12488.79it/s]


In [45]:
# load mappings
unk_token = "<UNK>"
with open(f"{DATADIR}/mappings_cat_merchant_type.json", 'r') as f:
     mappings_cat = json.load(f)

In [46]:
# if person is interested in money increasing/saving strategies
money_recommendations_pack = {
    'Capital saving strategy': {
        'Expenses decrease suggestion.': ' https://journal.tinkoff.ru/selected/budget/  ',
        'Card limits setting.': '  https://help.tinkoff.ru/drive-dc/limit-add-card/add-card-set-limits/  ',
    },
    'Safe capital increase strategies': {
        'Deposits suggestion.': '  https://www.tinkoff.ru/deposit/  ',
        'Charity suggestion.': {
            'Moscow': '  https://www.tinkoff.ru/payments/categories/blagotvoritelnost/  ',
            'All': '  https://www.tinkoff.ru/about/news/04082020-tinkoff-launched-charity-service/  ',
        },
        'Automatic savings.': '  https://www.tinkoff.ru/eng/media/news/08062020-investments-box-eng/  ',
    },
    'Capital increase strategies': {
        'Investement suggestion.': '  https://www.tinkoff.ru/invest/ ',
        'Charity suggestion.': {
            'Moscow': '  https://www.tinkoff.ru/payments/categories/blagotvoritelnost/ ',
            'All': '  https://www.tinkoff.ru/about/news/04082020-tinkoff-launched-charity-service/ ',
        },
    },
}

# if person is inetrested in entertainment
entertainment_recommendation_pack = {
    'Cinemas': {
        'Moscow': 'Visit three cinemas in Moscow. Get cashback for buying tickets at Tinkoff. https://www.tinkoff.ru/entertainment/moskva/movies/  ',
        "All": 'Visit three the oldest cinemas. Get cashback for buying tickets at Tinkoff.  https://help.tinkoff.ru/junior/earn/cashback-for-movie/ ',
    },
    'Restaurants': 'Visit three the most interesting Georgian restaurants.  https://www.tinkoff.ru/entertainment/:city/restaurants/ ',
    'Music': 'Visit three coolest rock-concerts in three months.  https://www.tinkoff.ru/entertainment/concerts/ ',
}

In [53]:
date_months = '2019-06'
user_num = 33943

def get_data_party(user_num, date_months):
    data_party = dict()
    
    data_user = data[data['party_rk'] == user_num]
    months = [item[:-3] for item in data_user['transaction_dttm']]
    data_user['month'] = months
    data_user = data_user[data_user['month'] == date_months]['merchant_type'].dropna()

    data_user_balance = data_balance[data_balance['party_rk'] == user_num]
    months = [item[:-3] for item in data_user_balance['cur_month']]
    data_user_balance['month'] = months
    data_user_balance = sum(data_user_balance[data_user_balance['month'] == date_months]['balance_chng'])

    city = data_soc[data_soc['party_rk'] == user_num]['region_flg']
    
    data_party["merchant_type"] = list(data_user)
    data_party["balance"] = data_user_balance
    data_party["city"] = np.int(city)
    
    return data_party

def get_key(d, value):
    for k, v in d.items():
        if v == value:
            return k
        

data_party = get_data_party(user_num, date_months)

In [58]:
def give_advise_category_based(cat, balance, city):
    
    # financial advises
    if balance < 0:
        strategy = 'Capital saving strategy'
        if cat in ['Развлечения', 'Рестораны']:
            return money_recommendations_pack[strategy]['Card limits setting.']
        else:
            return money_recommendations_pack[strategy]['Expenses decrease suggestion.']
    else:
        if balance >= 10000:
            strategy = 'Safe capital increase strategies'
            
            if city == 0: 
                city_name = 'Moscow'
                link = money_recommendations_pack[strategy]['Charity suggestion.'][city_name]
            else:
                city_name = 'All'
                link = money_recommendations_pack[strategy]['Charity suggestion.'][city_name]
                
            if cat in ['Развлечения', 'Кино']:
                link += '  '
                link += entertainment_recommendation_pack['Cinemas'][city_name]
        else:
            strategy = 'Capital increase strategies'
            return money_recommendations_pack[strategy]['Investement suggestion.']
        
    return link

In [54]:
def recommend_quest_for_user(data_party):
    
    categories_transac = np.zeros(len(mappings['category']))

    for name in data_party['merchant_type']:
        # get categories of transactions
        categories_transac[mappings_cat[str(mappings['merchant_type'][str(name)])][0]] += 1

    max_ind_category_1 = categories_transac.argmax()
    priority_cat_1 = get_key(mappings["category"], max_ind_category_1)
    categories_transac[max_ind_category_1] = 0

    max_ind_category_2 = categories_transac.argmax()
    priority_cat_2 = get_key(mappings["category"], max_ind_category_2)
    categories_transac[max_ind_category_2] = 0

    max_ind_category_3 = categories_transac.argmax()
    priority_cat_3 = get_key(mappings["category"], max_ind_category_3)
    categories_transac[max_ind_category_3] = 0

    max_ind_category_4 = categories_transac.argmax()
    priority_cat_4 = get_key(mappings["category"], max_ind_category_4)
    categories_transac[max_ind_category_3] = 0

    priorities = [priority_cat_1, priority_cat_2, priority_cat_3, priority_cat_4]
    
    
    return advises

In [60]:
def get_advise(data_party):
    categories_transac = np.zeros(len(mappings['category']))

    for name in data_party['merchant_type']:
        # get categories of transactions
        categories_transac[mappings_cat[str(mappings['merchant_type'][str(name)])][0]] += 1

    max_ind_category_1 = categories_transac.argmax()
    priority_cat_1 = get_key(mappings["category"], max_ind_category_1)
    categories_transac[max_ind_category_1] = 0

    max_ind_category_2 = categories_transac.argmax()
    priority_cat_2 = get_key(mappings["category"], max_ind_category_2)
    categories_transac[max_ind_category_2] = 0

    max_ind_category_3 = categories_transac.argmax()
    priority_cat_3 = get_key(mappings["category"], max_ind_category_3)
    categories_transac[max_ind_category_3] = 0

    max_ind_category_4 = categories_transac.argmax()
    priority_cat_4 = get_key(mappings["category"], max_ind_category_4)
    categories_transac[max_ind_category_3] = 0

    priorities = [priority_cat_1, priority_cat_2, priority_cat_3, priority_cat_4]

    advise_1 = give_advise_category_based(priority_cat_1, data_party['balance'], data_party['city'])
    return advise_1

## Result
The model can give an advise what can person do with their financies in the next month or can plan activities.

In [62]:
get_advise(data_party)

'  https://www.tinkoff.ru/payments/categories/blagotvoritelnost/  '