In [1]:
import pandas as pd
import snow.snow as sf
import numpy as np

from model import ImplicitFactorizationModel

In [2]:
interactions_df = sf.from_snow(wh='dixons_wh')

Using all_data_viewer...
Established connection.
Querying dixons_wh.all_data.postgres
Interactions data:  ('mark.ngoi.92@gmail.com', 436356, '2019-09-30', None, None, None, Decimal('7.200000'), 'No', 'Melbourne', 965, 'Gotcha!', 1443, '2019-09-30', Decimal('7.200000000000')) 

Connection succesfully closed.


In [3]:
coordinates_data = sf.from_snow(role='zans_role',wh='zans_wh',db='zans_db'
                                 ,schema='geo_clusters',query='select * from sydney')

coordinates_df = pd.DataFrame(coordinates_data
                              ,columns=['branchId','l','m','s','t'])

Using zans_role...
Established connection.
Querying zans_wh.zans_db.geo_clusters
Custom data:  (1872, 27, 74, 169, 568) 

Connection succesfully closed.


In [4]:
sydney_df = coordinates_df.merge(interactions_df)
sydney_df = sydney_df[['date','branchId','merchant','userId','email','l','m','s','t']]
sydney_df.shape

(347209, 9)

### Information retrieval

In [5]:
from collections import defaultdict

def build_retrieval_dicts(df):
    
    bid_to_merchant = {}
    uid_to_email = {}
    email_to_uid = {}
    bid_features = {}
    internal_uid = {}
    internal_bid = {}
    external_uid = {}
    external_bid = {}
    
    for i,bid in enumerate(df.branchId.unique()):
        
        temp_df = df[df.branchId==bid]
        bid_to_merchant[int(bid)] = temp_df.merchant.unique()[0]
        
        internal_bid[bid] = i
        external_bid[i] = bid
        
        features = temp_df.t.unique()[0]
        bid_features[int(bid)] = features
    
    print('Items processing complete.')
    
    temp_df = df[['userId','email']].drop_duplicates(subset='userId')

    for i,r in temp_df.iterrows():
        
        internal_uid[r['userId']] = i
        external_uid[i] = r['userId']
        
        uid_to_email[int(r['userId'])] = r['email']
        email_to_uid[r['email']] = int(r['userId'])
    
    return bid_to_merchant, uid_to_email, email_to_uid, bid_features, internal_uid, internal_bid, external_uid, external_bid

In [6]:
bid_to_merchant, uid_to_email, email_to_uid, bid_features, internal_uid, internal_bid, external_uid, external_bid = build_retrieval_dicts(sydney_df)

Items processing complete.


In [7]:
def past_transactions(email,df=sydney_df):
    
    interactions = df[['date','userId','email','merchant','t']]
    
    if email: return interactions[interactions.email==email].sort_values(by='date')
    if uid: return interactions[interactions.userId==uid].sort_values(by='date')
    else: return None

In [8]:
past_transactions('zan.thomson@gmail.com').sort_values(by='date').tail()

Unnamed: 0,date,userId,email,merchant,t
146208,2019-09-02,227214,zan.thomson@gmail.com,Crafty Cuts,435
35529,2019-09-12,227214,zan.thomson@gmail.com,Pablo and Rusty's,436
4934,2019-09-19,227214,zan.thomson@gmail.com,Industry Beans,439
4937,2019-09-20,227214,zan.thomson@gmail.com,Industry Beans,439
4935,2019-09-26,227214,zan.thomson@gmail.com,Industry Beans,439


In [9]:
interactions_df.head()

Unnamed: 0,email,userId,funnel1,funnel2,funnel3,funnel10,amount,is_inorganic,city,merchantId,merchant,branchId,date,aov
0,mark.ngoi.92@gmail.com,436356,2019-09-30,,,,7.2,No,Melbourne,965.0,Gotcha!,1443.0,2019-09-30,7.2
1,kocep@live.com,447478,2019-09-30,,,,7.1,No,Melbourne,1063.0,Happy Lemon,1604.0,2019-09-30,7.1
2,sj_litwiniuk@internode.on.net,388519,2019-09-30,,,,6.5,No,Melbourne,1017.0,CoCo Fresh Tea & Juice,1522.0,2019-09-30,6.5
3,komon_z@163.com,441183,2019-09-30,,,,18.0,No,Melbourne,802.0,8Bit,1135.0,2019-09-30,18.0
4,sine.metu.op125@gmail.com,447521,2019-09-30,,,,36.5,No,Sydney,1037.0,New Shanghai,1562.0,2019-09-30,36.5


### Set up interactions dataset

In [10]:
from interactions import Interactions

In [11]:
df = sydney_df.sample(frac=1)

uids = np.array([internal_uid[i] for i in df.userId.values], dtype=object)
iids = np.array([internal_bid[i] for i in df.branchId.values], dtype=object)
features = np.array([bid_features[external_bid[i]] for i in range(df.branchId.nunique())])

inters = Interactions(uids,iids,timestamps=df.date.values,item_features=features)
             

In [12]:
model = ImplicitFactorizationModel(loss='adaptive_hinge',representation='nonlinear_mixture')
model.fit(inters)

NameError: name 'context_net' is not defined