#Personalized Recommendation

In [None]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt

In [None]:
%pylab inline

##Load the Tables

In [None]:
us = pd.read_csv('data/user_list.csv')
us[:1]

In [None]:
cls = pd.read_csv('data/coupon_list_train_translated.csv')
cls[:5]

In [None]:
cvs = pd.read_csv('data/coupon_visit_train.csv')
cvs[:1]

In [None]:
cds = pd.read_csv('data/coupon_detail_train.csv')
cds[:1]

In [None]:
cas = pd.read_csv('data/coupon_area_train.csv')
cas[:1]

In [None]:
ls = pd.read_csv('data/prefecture_locations.csv')
ls[:1]

##Gather User Preferences

In [None]:
import collections
import json
import pickle
import traceback

def prate_to_cat(prate):
    if prate < 50:
        return 'low'
    elif prate < 60:
        return 'medium'
    else:
        return 'high'
    
def cprice_to_cat(cprice):
    if cprice < 4480:
        return 'low'
    elif cprice < 10500:
        return 'medium'
    else:
        return 'high'

def dprice_to_cat(dprice):
    if dprice < 1980:
        return 'low'
    elif dprice < 3880:
        return 'medium'
    else:
        return 'high'

def gather_user_preferences():
    ups = {}
    for i in us.index[:10]:
        u = us.ix[i]
        uh = u.USER_ID_hash
        ups[uh] = {
            'view_cnt': 0,
            'buy_cnt': 0,
            'capsule_text_view_cnt': collections.defaultdict(int),
            'capsule_text_buy_cnt': collections.defaultdict(int),
            'genre_view_cnt': collections.defaultdict(int),
            'genre_buy_cnt': collections.defaultdict(int),
            'large_area_view_cnt': collections.defaultdict(int),
            'large_area_buy_cnt': collections.defaultdict(int),
            'ken_view_cnt': collections.defaultdict(int),
            'ken_buy_cnt': collections.defaultdict(int),
            'small_area_view_cnt': collections.defaultdict(int),
            'small_area_buy_cnt': collections.defaultdict(int),
            'prate_view_cnt': collections.defaultdict(int),
            'prate_buy_cnt': collections.defaultdict(int),
            'cprice_view_cnt': collections.defaultdict(int),
            'cprice_buy_cnt': collections.defaultdict(int),
            'dprice_view_cnt': collections.defaultdict(int),
            'dprice_buy_cnt': collections.defaultdict(int)
        }
    exception_count = 0
    for j in cvs.index[:1413]:
        if j % 10000 == 0:
            print j
        cv = cvs.ix[j]
        uh = cv.USER_ID_hash
        ch = cv.VIEW_COUPON_ID_hash
        try:
            c = cls[cls.COUPON_ID_hash == cv.VIEW_COUPON_ID_hash].head(1)
            capsule_text = c.CAPSULE_TEXT.tolist()[0]
            genre = c.GENRE_NAME.tolist()[0]
            large_area = c.large_area_name.tolist()[0]
            ken = c.ken_name.tolist()[0]
            small_area = c.small_area_name.tolist()[0]
            prate = prate_to_cat(c.PRICE_RATE.tolist()[0])
            cprice = cprice_to_cat(c.CATALOG_PRICE.tolist()[0])
            dprice = dprice_to_cat(c.DISCOUNT_PRICE.tolist()[0])
            
            if cv.PURCHASE_FLG == 1:
                ups[uh]['buy_cnt'] += 1
                ups[uh]['capsule_text_buy_cnt'][capsule_text] += 1
                ups[uh]['genre_buy_cnt'][genre] += 1
                ups[uh]['large_area_buy_cnt'][large_area] += 1
                ups[uh]['ken_buy_cnt'][ken] += 1
                ups[uh]['small_area_buy_cnt'][small_area] += 1
                ups[uh]['prate_buy_cnt'][prate] += 1
                ups[uh]['cprice_buy_cnt'][cprice] += 1
                ups[uh]['dprice_buy_cnt'][dprice] += 1
            else:
                ups[uh]['view_cnt'] += 1
                ups[uh]['capsule_text_view_cnt'][capsule_text] += 1
                ups[uh]['genre_view_cnt'][genre] += 1
                ups[uh]['large_area_view_cnt'][large_area] += 1
                ups[uh]['ken_view_cnt'][ken] += 1
                ups[uh]['small_area_view_cnt'][small_area] += 1
                ups[uh]['prate_view_cnt'][prate] += 1
                ups[uh]['cprice_view_cnt'][cprice] += 1
                ups[uh]['dprice_view_cnt'][dprice] += 1
        except Exception as e:
            exception_count += 1
    print "exception count: {0}".format(exception_count)
        
    return ups
ups = gather_user_preferences()
print "ups size: {0}".format(len(ups))
#print ups.keys()[0]
#print json.dumps(ups[ups.keys()[0]], indent=2)
# output = open('ups.pkl', 'wb')
# pickle.dump(ups, output)
# output.close()

In [None]:
ups = pickle.load(open('tmp/ups.pkl'))
len(ups)

In [None]:
print ups.keys()[0]
print json.dumps(ups[ups.keys()[0]], indent=2)

##Scoring Function

In [None]:
clt = pd.read_csv('data/coupon_list_test_translated.csv')
clt[20:23]

In [None]:
def score(uh, ch):
    u = ups[uh]
    c = clt[clt.COUPON_ID_hash == ch].head(1)
    s = 0
    
    # genre
    genre = c.GENRE_NAME.tolist()[0]
    genre_view_score = u['genre_view_cnt'][genre]
    genre_buy_score = u['genre_buy_cnt'][genre]
    
    # large area
    large_area = c.large_area_name.tolist()[0]
    large_area_view_score = u['large_area_view_cnt'][genre]
    large_area_buy_score = u['large_area_buy_cnt'][genre]
    
    # ken
    ken = c.ken_name.tolist()[0]
    ken_view_score = u['ken_view_cnt'][genre]
    ken_buy_score = u['ken_buy_cnt'][genre]
    
    small_area = c.small_area_name.tolist()[0]
    small_area_view_score = u['small_area_view_cnt'][genre]
    small_area_buy_score = u['small_area_buy_cnt'][genre]
    
    GENRE_VIEW_WEIGHT = 0.5
    GENRE_BUY_WEIGHT = 3.0
    LARGE_AREA_VIEW_WEIGHT = 0.5
    LARGE_AREA_BUY_WEIGHT = 1.0
    KEN_VIEW_WEIGHT = 0.5
    KEN_BUY_WEIGHT = 1.0
    SMALL_AREA_VIEW_WEIGHT = 0.5
    SMALL_AREA_BUY_WEIGHT = 2.0
    
    s = (
        genre_view_score * GENRE_VIEW_WEIGHT +
        genre_buy_score * GENRE_BUY_WEIGHT +
        large_area_view_score * LARGE_AREA_VIEW_WEIGHT +
        large_area_buy_score * LARGE_AREA_BUY_WEIGHT +
        ken_view_score * KEN_VIEW_WEIGHT +
        ken_buy_score * KEN_BUY_WEIGHT +
        small_area_view_score * SMALL_AREA_VIEW_WEIGHT +
        small_area_buy_score * SMALL_AREA_BUY_WEIGHT
        )
    return s
score('17f0f6675b8a3f2d9b2f2035cf9b6c57', 'd846fc2343223e41914aa4e81ddea668')

##Recommended Coupons for a User

In [None]:
import datetime
counter = 0

In [None]:
def recommended_coupons(uh):
    global counter
    if counter % 100 == 0:
        print datetime.datetime.now(), counter
    counter += 1
    cs = clt
    cs['SCORE'] = cs.COUPON_ID_hash.apply(lambda ch: score(uh, ch))
    cs = cs.sort(columns=['SCORE', 'CATALOG_PRICE'], ascending=False)
    return " ".join(cs.COUPON_ID_hash[:10].tolist())
recommended_coupons('17f0f6675b8a3f2d9b2f2035cf9b6c57')

##Submit

In [None]:
s = pd.read_csv('data/sample_submission.csv')
s = s[:]
s[:5]
len(s)

In [None]:
#s.PURCHASED_COUPONS = s.USER_ID_hash.apply(lambda uh: recommended_coupons(uh))