#Personalized Recommendation

In [None]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt

In [None]:
%pylab inline

##Load the Tables

In [None]:
us = pd.read_csv('data/user_list.csv')
us[:1]

In [None]:
cls = pd.read_csv('data/coupon_list_train_translated.csv')
cls[:5]

In [None]:
cvs = pd.read_csv('data/coupon_visit_train.csv')
cvs[:20]

In [None]:
cds = pd.read_csv('data/coupon_detail_train.csv')
cds[:1]

In [None]:
cas = pd.read_csv('data/coupon_area_train.csv')
cas[:1]

In [None]:
ls = pd.read_csv('data/prefecture_locations.csv')
ls[:1]

##Gather User Preferences

In [None]:
import collections
import json
import pickle
import traceback

def prate_to_cat(prate):
    if prate < 50:
        return 'low'
    elif prate < 60:
        return 'medium'
    else:
        return 'high'
    
def cprice_to_cat(cprice):
    if cprice < 4480:
        return 'low'
    elif cprice < 10500:
        return 'medium'
    else:
        return 'high'

def dprice_to_cat(dprice):
    if dprice < 1980:
        return 'low'
    elif dprice < 3880:
        return 'medium'
    else:
        return 'high'

def gather_user_preferences():
    ups = {}
    for i in us.index[:10]:
        u = us.ix[i]
        uh = u.USER_ID_hash
        ups[uh] = {
            'view_cnt': 0,
            'buy_cnt': 0,
            'capsule_text_view_cnt': collections.defaultdict(int),
            'capsule_text_buy_cnt': collections.defaultdict(int),
            'genre_view_cnt': collections.defaultdict(int),
            'genre_buy_cnt': collections.defaultdict(int),
            'large_area_view_cnt': collections.defaultdict(int),
            'large_area_buy_cnt': collections.defaultdict(int),
            'ken_view_cnt': collections.defaultdict(int),
            'ken_buy_cnt': collections.defaultdict(int),
            'small_area_view_cnt': collections.defaultdict(int),
            'small_area_buy_cnt': collections.defaultdict(int),
            'prate_view_cnt': collections.defaultdict(int),
            'prate_buy_cnt': collections.defaultdict(int),
            'cprice_view_cnt': collections.defaultdict(int),
            'cprice_buy_cnt': collections.defaultdict(int),
            'dprice_view_cnt': collections.defaultdict(int),
            'dprice_buy_cnt': collections.defaultdict(int)
        }
    exception_count = 0
    for j in cvs.index[:1413]:
        if j % 10000 == 0:
            print j
        cv = cvs.ix[j]
        uh = cv.USER_ID_hash
        ch = cv.VIEW_COUPON_ID_hash
        try:
            c = cls[cls.COUPON_ID_hash == cv.VIEW_COUPON_ID_hash].head(1)
            capsule_text = c.CAPSULE_TEXT.tolist()[0]
            genre = c.GENRE_NAME.tolist()[0]
            large_area = c.large_area_name.tolist()[0]
            ken = c.ken_name.tolist()[0]
            small_area = c.small_area_name.tolist()[0]
            prate = prate_to_cat(c.PRICE_RATE.tolist()[0])
            cprice = cprice_to_cat(c.CATALOG_PRICE.tolist()[0])
            dprice = dprice_to_cat(c.DISCOUNT_PRICE.tolist()[0])
            
            if cv.PURCHASE_FLG == 1:
                ups[uh]['buy_cnt'] += 1
                ups[uh]['capsule_text_buy_cnt'][capsule_text] += 1
                ups[uh]['genre_buy_cnt'][genre] += 1
                ups[uh]['large_area_buy_cnt'][large_area] += 1
                ups[uh]['ken_buy_cnt'][ken] += 1
                ups[uh]['small_area_buy_cnt'][small_area] += 1
                ups[uh]['prate_buy_cnt'][prate] += 1
                ups[uh]['cprice_buy_cnt'][cprice] += 1
                ups[uh]['dprice_buy_cnt'][dprice] += 1
            else:
                ups[uh]['view_cnt'] += 1
                ups[uh]['capsule_text_view_cnt'][capsule_text] += 1
                ups[uh]['genre_view_cnt'][genre] += 1
                ups[uh]['large_area_view_cnt'][large_area] += 1
                ups[uh]['ken_view_cnt'][ken] += 1
                ups[uh]['small_area_view_cnt'][small_area] += 1
                ups[uh]['prate_view_cnt'][prate] += 1
                ups[uh]['cprice_view_cnt'][cprice] += 1
                ups[uh]['dprice_view_cnt'][dprice] += 1
        except Exception as e:
            exception_count += 1
    print "exception count: {0}".format(exception_count)
        
    return ups
ups = gather_user_preferences()
print "ups size: {0}".format(len(ups))
#print ups.keys()[0]
#print json.dumps(ups[ups.keys()[0]], indent=2)
# output = open('ups.pkl', 'wb')
# pickle.dump(ups, output)
# output.close()

In [None]:
ups = pickle.load(open('tmp/ups3.pkl'))
len(ups)

In [None]:
print ups.keys()[0]
print json.dumps(ups[ups.keys()[0]], indent=2)

##Scoring Function

In [None]:
clt = pd.read_csv('data/coupon_list_test_translated.csv')
clt[20:40]

In [None]:
BUY = 1.0
VIEW = 1.0

CAPSULE = 1.0
GENRE = 1.0

LAREA = 1.0
KEN = 1.0
SAREA = 1.0

PRATE = 1.0
CPRICE = 1.0
DPRICE = 1.0

def prate_to_cat(prate):
    if prate < 50:
        return 'low'
    elif prate < 60:
        return 'medium'
    else:
        return 'high'
    
def cprice_to_cat(cprice):
    if cprice < 4480:
        return 'low'
    elif cprice < 10500:
        return 'medium'
    else:
        return 'high'

def dprice_to_cat(dprice):
    if dprice < 1980:
        return 'low'
    elif dprice < 3880:
        return 'medium'
    else:
        return 'high'

def score(uh, ch):
    u = ups[uh]
    c = clt[clt.COUPON_ID_hash == ch].head(1)
    s = 0

    view_cnt = float(u['view_cnt'])
    if view_cnt == 0:
        view_cnt = 1000
    print "view_cnt: ", view_cnt

    buy_cnt = float(u['buy_cnt'])
    if buy_cnt == 0:
        buy_cnt = 1000
    print "buy_cnt: ", buy_cnt

    # capsule
    capsule = c.CAPSULE_TEXT.tolist()[0]
    capsule_view = u['capsule_text_view_cnt'][capsule]/view_cnt
    print "capsule_view: ", capsule_view
    capsule_buy = u['capsule_text_buy_cnt'][capsule]/buy_cnt
    print "capsule_buy: ", capsule_buy

    # genre
    genre = c.GENRE_NAME.tolist()[0]
    genre_view = u['genre_view_cnt'][genre]/view_cnt
    print "genre_view: ", genre_view
    genre_buy = u['genre_buy_cnt'][genre]/buy_cnt
    print "genre_buy: ", genre_buy

    # sarea
    sarea = c.small_area_name.tolist()[0]
    sarea_view = u['small_area_view_cnt'][sarea]/view_cnt
    print "sarea_view: ", sarea_view
    sarea_buy = u['small_area_buy_cnt'][sarea]/buy_cnt
    print "sarea_buy: ", sarea_buy

    # ken
    ken = c.ken_name.tolist()[0]
    ken_view = u['ken_view_cnt'][ken]/view_cnt
    print "ken_view: ", ken_view
    ken_buy = u['ken_buy_cnt'][ken]/buy_cnt
    print "ken_buy: ", ken_buy

    # larea
    larea = c.large_area_name.tolist()[0]
    larea_view = u['large_area_view_cnt'][larea]/view_cnt
    print "larea_view: ", larea_view
    larea_buy = u['large_area_buy_cnt'][larea]/buy_cnt
    print "larea_buy: ", larea_buy

    # prate
    prate = prate_to_cat(int(c.PRICE_RATE .tolist()[0]))
    prate_view = u['prate_view_cnt'][prate]/view_cnt
    print "prate_view: ", prate_view
    prate_buy = u['prate_buy_cnt'][prate]/buy_cnt
    print "prate_buy: ", prate_buy

    # cprice
    cprice = cprice_to_cat(int(c.CATALOG_PRICE.tolist()[0]))
    cprice_view = u['cprice_view_cnt'][cprice]/view_cnt
    print "cprice_view: ", cprice_view
    cprice_buy = u['cprice_buy_cnt'][cprice]/buy_cnt
    print "cprice_buy: ", cprice_buy

    # dprice
    dprice = dprice_to_cat(int(c.DISCOUNT_PRICE .tolist()[0]))
    dprice_view = u['dprice_buy_cnt'][dprice]/view_cnt
    print "dprice_view: ", dprice_view
    dprice_buy = u['dprice_buy_cnt'][dprice]/buy_cnt
    print "dprice_buy: ", dprice_buy

    s = (
        capsule_view * CAPSULE * VIEW +
        capsule_buy * CAPSULE * BUY +
        genre_view * GENRE * VIEW +
        genre_buy * GENRE * BUY +

        sarea_view * SAREA * VIEW +
        sarea_buy * SAREA * BUY +
        ken_view * KEN * VIEW +
        ken_buy * KEN * BUY +
        larea_view * LAREA * VIEW +
        larea_buy * LAREA * BUY +

        prate_view * PRATE * VIEW +
        prate_buy * PRATE * BUY +
        cprice_view * CPRICE * VIEW +
        cprice_buy * CPRICE * BUY +
        dprice_view * DPRICE * VIEW +
        dprice_buy * DPRICE * BUY
    )
    return s

    return s
score('17f0f6675b8a3f2d9b2f2035cf9b6c57', '1124ba8cedc687ac5e0b51916006d58d')

##Recommended Coupons for a User

In [None]:
import datetime
counter = 0

In [None]:
def recommended_coupons(uh):
    global counter
    if counter % 100 == 0:
        print datetime.datetime.now(), counter
    counter += 1
    cs = clt
    cs['SCORE'] = cs.COUPON_ID_hash.apply(lambda ch: score(uh, ch))
    sum_score = cs.SCORE.sum()
    cs = cs.sort(columns=['SCORE', 'CATALOG_PRICE'], ascending=False)
    #cs = cs[cs.SCORE > (sum_score * 0.005)]
    return " ".join(cs.COUPON_ID_hash[:3].tolist())
recommended_coupons('17f0f6675b8a3f2d9b2f2035cf9b6c57')

##Submit

In [None]:
s = pd.read_csv('data/sample_submission.csv')
s = s[:]
s[:5]
len(s)

In [None]:
#s.PURCHASED_COUPONS = s.USER_ID_hash.apply(lambda uh: recommended_coupons(uh))