In [None]:
import sys
import csv
import time
import itertools
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook
from sklearn import datasets
import matplotlib.pyplot as plt 
from sklearn.metrics import f1_score
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
from sklearn.model_selection import train_test_split
from sklearn import model_selection
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix

from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import Normalizer

from sklearn.metrics import roc_curve, auc

from random import randint
import random
import copy

In [None]:
df=pd.read_csv('/Users/amirasarbaev/Downloads/Internship/data/test/5_time_line.csv',header=0,sep=',')
df['week']=df.loc[:,"week_click":"week_undisp"].min(axis=1)

In [None]:
col=[col for col in df.columns if col not in ["week_click","week_book","week_reply","week_undisp"]]
df=df[col]

# Dataset Split based on displayed offers for target users and timeline

In [None]:
user_list=np.unique(df.user_id)

In [None]:
test=[]
train=[]
for i in tqdm_notebook(range(len(user_list))):
    df_split = np.array_split(df[df.user_id==user_list[i]].sort_values(by='week'), 3)
    train.append(list(df_split[0].index))
    train.append(list(df_split[1].index))
    test.append(list(df_split[2].index))
    
    

In [None]:
train=list(itertools.chain(*train))
test=list(itertools.chain(*test))

X_train=df[df.index.isin(train)]
X_test=df[df.index.isin(test)]

X_train.set_index(['user_id','item_id'], inplace=True)
X_test.set_index(['user_id','item_id'], inplace=True)

Y_train_click=X_train.click
Y_train_book=X_train.book
Y_train_reply=X_train.reply
Y_test_click=X_test.click
Y_test_book=X_test.book
Y_test_reply=X_test.reply
col=[col for col in df.columns if col not in ['displayed','user_id','item_id',"click","book","reply","week"]]
X_train=X_train[col]
X_test=X_test[col]

# Dataset Normilization

In [None]:
X_train = X_train.astype(np.float64)
X_test = X_test.astype(np.float64)

min_max_scaler = preprocessing.MinMaxScaler()
X_train_minmax = min_max_scaler.fit_transform(X_train.iloc[:,15:])
X_test_minmax = min_max_scaler.transform(X_test.iloc[:,15:])

X_train_minmax = pd.DataFrame(X_train_minmax)
X_test_minmax = pd.DataFrame(X_test_minmax)
X_train_minmax.columns = X_train.iloc[:,15:].columns
X_test_minmax.columns = X_test.iloc[:,15:].columns
X_train_minmax.index = X_train.iloc[:,15:].index
X_test_minmax.index = X_test.iloc[:,15:].index
X_test_minmax

In [None]:
sc=StandardScaler()
sc.fit(X_train_minmax)
X_train_std=sc.transform(X_train_minmax)
X_test_std=sc.transform(X_test_minmax)

X_train_std = pd.DataFrame(X_train_std)
X_test_std = pd.DataFrame(X_test_std)
X_train_std.columns = X_train.iloc[:,15:].columns
X_test_std.columns = X_test.iloc[:,15:].columns
X_train_std.index = X_train.iloc[:,15:].index
X_test_std.index = X_test.iloc[:,15:].index
X_test_std

# Union of extracted features + normolized contextual

In [None]:
U=np.unique(X_train_std.reset_index().user_id)
I=np.unique(X_train_std.reset_index().item_id)

In [None]:
from random import randint
import random
import copy
def sample_floats(low, high, k=1):
    """ Return a k-length list of unique random floats
        in the range of low <= x <= high
    """
    result = []
    seen = set()
    for i in range(k):
        x = random.uniform(low, high)
        while x in seen:
            x = random.uniform(low, high)
        seen.add(x)
        result.append(x)
    return result

In [None]:
U_repr = {U[i]:[v for v in sample_floats(0,1,k=X_train.shape[1])] for i in range (len(U))}
I_repr= {I[j]:[w for w in sample_floats(0,1,k=X_train.shape[1])] for j in range (len(I))}


In [None]:
Y_train_click=Y_train_click.reset_index()
Y_train_book=Y_train_book.reset_index()
Y_train_reply=Y_train_reply.reset_index()
X_train_std=X_train_std.reset_index()
X_test_std=X_test_std.reset_index()
Y_test_click=Y_test_click.reset_index()
Y_test_book=Y_test_book.reset_index()
Y_test_reply=Y_test_reply.reset_index()

In [None]:
X_train_dict={}
for i in tqdm_notebook(xrange(len(X_train_std))):
    X_train_dict[X_train_std.user_id[i],X_train_std.item_id[i]]=X_train.iloc[i,:15].values.tolist()+\
    X_train_std.iloc[i,2:].values.tolist()
#     +U_repr[X_train_std.user_id[i]]+I_repr[X_train_std.item_id[i]]

In [None]:
Y_train_dict_click={}
Y_train_dict_book={}
Y_train_dict_reply={}
for i in tqdm_notebook(xrange(len(Y_train_click))):
    Y_train_dict_click[Y_train_click.user_id[i],Y_train_click.item_id[i]]=Y_train_click.iloc[i,2]
    Y_train_dict_book[Y_train_book.user_id[i],Y_train_book.item_id[i]]=Y_train_book.iloc[i,2]
    Y_train_dict_reply[Y_train_reply.user_id[i],Y_train_reply.item_id[i]]=Y_train_reply.iloc[i,2]
    

# Stochastic gradient descent

In [16]:
indx_dict={}
for i in range(len(U)):
    indx_dict[U[i]]=X_train_std[X_train_std.user_id==U[i]].iloc[:,:2].values.tolist()

In [20]:
#Inputs:
eta=0.0001
eps=1e-3
lamb=1e-3
nb_runs = 600000
#Initialization:

global_loss = [1,0]
nb=1
while abs(global_loss[nb-1]-global_loss[nb])>eps:
    loss_local=0
    for j in tqdm_notebook(range(nb_runs)):
        u = randint(0,len(U)-1)
        
        target_samples=indx_dict[U[u]]
        target_lables=[Y_train_dict_click[tuple(elem)] for elem in target_samples]
        positive=[target_samples[m] for m in [l for l, e in enumerate(target_lables) if e != 0]]
        negative=[target_samples[m] for m in [l for l, e in enumerate(target_lables) if e == 0]]

        if (len(positive)!=0 and len(negative)!=0): 

            p=randint(0, len(positive)-1)
            n=randint(0, len(negative)-1)

            user=U_repr[U[u]]
            item_plus = I_repr[positive[p][1]]
            item_minus = I_repr[negative[n][1]]
            
            diff=(np.array(item_minus) - np.array(item_plus)).tolist()
            ex=np.exp(np.dot(user,diff))
            dl_4_user =  diff / ( 1 + (1. / ex ) )
            dl_4_item_plus = ( (-1) * np.array( user ) ) / ( 1 + (1. / ex ) )
            dl_4_item_minus = ( user ) / ( 1 + (1. / ex ) )

                
            user = user - eta * ( ( dl_4_user ) + ( 2 * lamb * np.sum( user ) ) ) 

            item_plus = item_plus - eta * ( ( dl_4_item_plus ) + ( 2 * lamb * np.sum( item_plus ) ) )

            item_minus = item_minus - eta * ( ( dl_4_item_minus ) + ( 2 * lamb *np.sum( item_minus ) ) )

            loss_local=loss_local+(np.log( 1. + ex))+\
            lamb*(np.linalg.norm(user,2)**2+np.linalg.norm(item_plus,2)**2+np.linalg.norm(item_minus,2)**2)

            U_repr[U[u]]=user.tolist()
            I_repr[positive[p][1]]=item_plus.tolist()
            I_repr[negative[n][1]]=item_minus.tolist()

    global_loss.append(loss_local/nb_runs)
    nb+=1
    print(global_loss)

        
        


A Jupyter Widget


[1, 0, 0.85461669883173952]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633, 0.68056815260658388]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633, 0.68056815260658388, 0.67609299324083594]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633, 0.68056815260658388, 0.67609299324083594, 0.66841071586718503]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633, 0.68056815260658388, 0.67609299324083594, 0.66841071586718503, 0.66333500543062807]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633, 0.68056815260658388, 0.67609299324083594, 0.66841071586718503, 0.66333500543062807, 0.65718641639994946]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633, 0.68056815260658388, 0.67609299324083594, 0.66841071586718503, 0.66333500543062807, 0.65718641639994946, 0.65196804609793546]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633, 0.68056815260658388, 0.67609299324083594, 0.66841071586718503, 0.66333500543062807, 0.65718641639994946, 0.65196804609793546, 0.6462554107509485]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633, 0.68056815260658388, 0.67609299324083594, 0.66841071586718503, 0.66333500543062807, 0.65718641639994946, 0.65196804609793546, 0.6462554107509485, 0.64151847154766095]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633, 0.68056815260658388, 0.67609299324083594, 0.66841071586718503, 0.66333500543062807, 0.65718641639994946, 0.65196804609793546, 0.6462554107509485, 0.64151847154766095, 0.63756396812294636]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633, 0.68056815260658388, 0.67609299324083594, 0.66841071586718503, 0.66333500543062807, 0.65718641639994946, 0.65196804609793546, 0.6462554107509485, 0.64151847154766095, 0.63756396812294636, 0.63158944245303206]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633, 0.68056815260658388, 0.67609299324083594, 0.66841071586718503, 0.66333500543062807, 0.65718641639994946, 0.65196804609793546, 0.6462554107509485, 0.64151847154766095, 0.63756396812294636, 0.63158944245303206, 0.62901966653068231]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633, 0.68056815260658388, 0.67609299324083594, 0.66841071586718503, 0.66333500543062807, 0.65718641639994946, 0.65196804609793546, 0.6462554107509485, 0.64151847154766095, 0.63756396812294636, 0.63158944245303206, 0.62901966653068231, 0.62587722356579611]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633, 0.68056815260658388, 0.67609299324083594, 0.66841071586718503, 0.66333500543062807, 0.65718641639994946, 0.65196804609793546, 0.6462554107509485, 0.64151847154766095, 0.63756396812294636, 0.63158944245303206, 0.62901966653068231, 0.62587722356579611, 0.62258124094222989]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633, 0.68056815260658388, 0.67609299324083594, 0.66841071586718503, 0.66333500543062807, 0.65718641639994946, 0.65196804609793546, 0.6462554107509485, 0.64151847154766095, 0.63756396812294636, 0.63158944245303206, 0.62901966653068231, 0.62587722356579611, 0.62258124094222989, 0.61709815573054283]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633, 0.68056815260658388, 0.67609299324083594, 0.66841071586718503, 0.66333500543062807, 0.65718641639994946, 0.65196804609793546, 0.6462554107509485, 0.64151847154766095, 0.63756396812294636, 0.63158944245303206, 0.62901966653068231, 0.62587722356579611, 0.62258124094222989, 0.61709815573054283, 0.61298050676264049]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633, 0.68056815260658388, 0.67609299324083594, 0.66841071586718503, 0.66333500543062807, 0.65718641639994946, 0.65196804609793546, 0.6462554107509485, 0.64151847154766095, 0.63756396812294636, 0.63158944245303206, 0.62901966653068231, 0.62587722356579611, 0.62258124094222989, 0.61709815573054283, 0.61298050676264049, 0.61107353307596068]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633, 0.68056815260658388, 0.67609299324083594, 0.66841071586718503, 0.66333500543062807, 0.65718641639994946, 0.65196804609793546, 0.6462554107509485, 0.64151847154766095, 0.63756396812294636, 0.63158944245303206, 0.62901966653068231, 0.62587722356579611, 0.62258124094222989, 0.61709815573054283, 0.61298050676264049, 0.61107353307596068, 0.60796481725011842]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633, 0.68056815260658388, 0.67609299324083594, 0.66841071586718503, 0.66333500543062807, 0.65718641639994946, 0.65196804609793546, 0.6462554107509485, 0.64151847154766095, 0.63756396812294636, 0.63158944245303206, 0.62901966653068231, 0.62587722356579611, 0.62258124094222989, 0.61709815573054283, 0.61298050676264049, 0.61107353307596068, 0.60796481725011842, 0.60267716063347465]


A Jupyter Widget


[1, 0, 0.85461669883173952, 0.82340544674461991, 0.79871025106486171, 0.78091310852991402, 0.76417443244412597, 0.75204183165150185, 0.73934713664312213, 0.72930045976647007, 0.71952552293855443, 0.70950796700208763, 0.70169630638808567, 0.69430614497850052, 0.68768133329795633, 0.68056815260658388, 0.67609299324083594, 0.66841071586718503, 0.66333500543062807, 0.65718641639994946, 0.65196804609793546, 0.6462554107509485, 0.64151847154766095, 0.63756396812294636, 0.63158944245303206, 0.62901966653068231, 0.62587722356579611, 0.62258124094222989, 0.61709815573054283, 0.61298050676264049, 0.61107353307596068, 0.60796481725011842, 0.60267716063347465, 0.60184513962184438]


# Ranking

In [21]:
U_pandas=pd.DataFrame(U_repr.items())
I_pandas=pd.DataFrame(I_repr.items())

In [22]:
U_pandas=U_pandas.set_index(0)
U_pandas.index.name="user_id"
I_pandas=I_pandas.set_index(0)
I_pandas.index.name="item_id"

In [23]:
U_pandas=U_pandas[1].apply(pd.Series)
cols=['U_'+str(i) for i in range (31)]
U_pandas.columns=cols

In [24]:
I_pandas=I_pandas[1].apply(pd.Series)
cols=['I_'+str(i) for i in range (31)]
I_pandas.columns=cols

In [25]:
X_final=X_test_std

In [26]:
I_test=np.unique(X_test_std.item_id)
main_list = np.setdiff1d(I_test,I)
random_items={}
for i in range(len (main_list)):
    random_items[main_list[i]]=[v for v in sample_floats(0,1,k=31)]
    
    
    

In [27]:
pd_scores=[]
for i in tqdm_notebook(range(len(X_final))):
    try:
        pd_scores.append(np.dot(U_pandas[U_pandas.index==X_final.iloc[i,0]].values.tolist()[0],\
                    I_pandas[I_pandas.index==X_final.iloc[i,1]].values.tolist()[0]))
    except:
        pd_scores.append(np.dot(U_pandas[U_pandas.index==X_final.iloc[i,0]].values.tolist()[0],\
                    random_items[X_final.iloc[i,1]]))

A Jupyter Widget




In [28]:
X_final['scores']=pd_scores
X_final=pd.merge(X_final,Y_test_click,how='left',on=['user_id','item_id'])

In [37]:
k=5
total_map=[]
for i in tqdm_notebook(range (len(U))):
    target_list=X_final[X_final.user_id==U[i]].sort_values(by='scores',ascending=False)
    #     target_list=target_list[target_list.Y_test==1]
    up=1
    down=1
    precision=[]
    for j,score in enumerate(target_list.click[:k].values.tolist()):
        if score!=0:
            precision.append(float(up)/float(down+j))
            up+=1
        else:
            precision.append(0)
    total_map.append(np.sum(precision[0:k])/k)
np.mean(total_map)

A Jupyter Widget




0.35400403429147759

# Classification

In [30]:
Class_train={}
for i in tqdm_notebook(range(len(X_train_std))):
        Class_train[X_train_std.user_id[i],X_train_std.item_id[i]]=U_repr[X_train_std.user_id[i]]+\
        I_repr[X_train_std.item_id[i]]
#         +list(Y_train_click[(Y_train_click.user_id==X_train_std.user_id[i])&(Y_train_click.item_id==X_train_std.item_id[i])].click)


A Jupyter Widget




In [31]:
Class_train=pd.DataFrame(Class_train.items())
col=["U_repr_"+str(i) for i in range(31)]+["I_repr_"+str(i) for i in range(31)]

In [32]:
Class_train[['user_id','item_id']] = pd.DataFrame(Class_train[0].values.tolist(), index= Class_train.index)

In [33]:
Class_train[['user_id','item_id']] = pd.DataFrame(Class_train[0].values.tolist(), index= Class_train.index)
Class_train[col] = pd.DataFrame(Class_train[1].values.tolist(), index= Class_train.index)
Class_train.iloc[:,4:]

Unnamed: 0,U_repr_0,U_repr_1,U_repr_2,U_repr_3,U_repr_4,U_repr_5,U_repr_6,U_repr_7,U_repr_8,U_repr_9,...,I_repr_21,I_repr_22,I_repr_23,I_repr_24,I_repr_25,I_repr_26,I_repr_27,I_repr_28,I_repr_29,I_repr_30
0,0.285530,0.451673,0.415204,0.794095,0.198871,0.281770,0.813397,0.973086,0.050642,0.776217,...,0.519292,0.051649,0.607856,0.688086,0.181325,0.039791,0.537033,0.635758,0.373915,0.784999
1,0.895104,0.280017,0.527172,0.145700,0.620958,0.744842,0.943090,0.353378,0.718098,0.909792,...,0.715712,0.272394,0.702153,0.448264,-0.025882,0.203838,0.724286,0.531448,0.034788,0.644715
2,0.987992,0.891092,0.311056,0.378921,0.573891,0.409485,0.934201,0.901214,0.780850,0.206379,...,0.175539,0.980366,0.067079,0.221175,0.780759,0.858320,0.645577,0.628580,0.570290,0.923085
3,0.564819,0.768071,0.655653,0.887160,0.396261,0.694480,0.246989,0.467233,0.795600,0.707143,...,0.099362,0.324966,0.342550,0.183293,0.860795,0.355197,0.818201,0.642858,0.734898,0.806569
4,0.703309,0.300677,0.472421,0.121662,0.847579,0.137306,0.450844,0.920108,0.425148,0.685081,...,0.587688,0.230815,0.343582,0.875288,0.379667,-0.033253,0.603276,0.749696,-0.019381,0.844270
5,0.579626,0.164982,0.826982,0.611158,0.105044,0.542491,0.532569,0.142549,0.251473,0.853334,...,0.435556,0.666825,0.239913,0.293641,0.293746,0.511353,0.326978,0.618164,0.376432,0.695856
6,0.955736,0.203920,0.760478,0.561193,0.647169,0.633946,0.594385,0.512058,0.296685,0.468960,...,0.557436,0.879699,0.358299,0.118096,0.785606,0.945393,0.126177,0.061572,0.506879,0.558122
7,0.084902,0.102463,0.174672,0.743350,0.843741,0.659644,0.349264,0.693726,0.377027,0.673582,...,0.779821,0.702709,-0.020214,0.580304,0.874335,0.651586,-0.050605,0.350931,0.263674,0.331525
8,0.781435,0.348019,0.048853,0.901837,0.786121,0.470513,0.703930,0.435168,0.951768,0.736976,...,0.517617,0.105384,0.631562,0.919217,0.153353,0.367011,0.752438,0.980166,0.084884,0.075941
9,0.369227,0.599645,0.382662,0.419308,0.197116,0.236051,0.518794,0.118112,0.936949,0.344386,...,0.245170,0.083046,0.334397,0.985642,0.563998,0.699906,0.226575,0.211909,0.874451,0.945193


In [34]:
Class_train=pd.merge(Class_train,Y_train_click,how="left",on=["user_id","item_id"])

In [35]:
lr = LogisticRegressionCV(penalty='l2',class_weight='balanced')
lr.fit(Class_train.iloc[:,4:-1], Class_train.iloc[:,-1]) 

LogisticRegressionCV(Cs=10, class_weight='balanced', cv=None, dual=False,
           fit_intercept=True, intercept_scaling=1.0, max_iter=100,
           multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,
           refit=True, scoring=None, solver='lbfgs', tol=0.0001, verbose=0)

In [46]:
Y_test_click_dic={}
for i in tqdm_notebook(xrange(len(Y_test_click))):
    Y_test_click_dic[Y_test_click.user_id[i],Y_test_click.item_id[i]]=Y_test_click.iloc[i,2]
    

A Jupyter Widget




In [49]:
X_final=X_test_std

In [51]:
Class={}
for i in tqdm_notebook(range(len(X_final))):
    try:
        Class[X_final.user_id[i],X_final.item_id[i]]=U_repr[X_final.user_id[i]]+\
        I_repr[X_final.item_id[i]]+\
        [Y_test_click_dic[X_final.user_id[i],X_final.item_id[i]]]
    except:
        Class[X_final.user_id[i],X_final.item_id[i]]=U_repr[X_final.user_id[i]]+\
        random_items[X_final.item_id.values.tolist()[i]]+\
        [Y_test_click_dic[X_final.user_id[i],X_final.item_id[i]]]
       

A Jupyter Widget




Exception in thread Thread-44:
Traceback (most recent call last):
  File "/Users/amirasarbaev/anaconda2/lib/python2.7/threading.py", line 801, in __bootstrap_inner
    self.run()
  File "/Users/amirasarbaev/anaconda2/lib/python2.7/site-packages/tqdm/_monitor.py", line 63, in run
    for instance in self.tqdm_cls._instances:
  File "/Users/amirasarbaev/anaconda2/lib/python2.7/_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration



In [None]:
col=col+['Y_test_click']

In [None]:
Class_final=pd.DataFrame(Class.items())
Class_final[['user_id','item_id']] = pd.DataFrame(Class_final[0].values.tolist(), index= Class_final.index)
Class_final[col] = pd.DataFrame(Class_final[1].values.tolist(), index= Class_final.index)
Class_final

In [None]:
Y_pred=lr.predict(Class_final.iloc[:,4:-1])
print(classification_report(Class_final.iloc[:,-1], Y_pred))
confusion_matrix(Class_final.iloc[:,-1], Y_pred)

In [None]:
false_positive_rate, recall, thresholds = roc_curve(Y_pred,Class_final.iloc[:,-1])
roc_auc = auc(false_positive_rate, recall)
plt.title('Receiver Operating Characteristic')
plt.plot(false_positive_rate, recall, 'b', label='AUC = %0.2f' % roc_auc)
plt.legend(loc='lower right')
plt.plot([0, 1], [0, 1], 'r--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.ylabel('Recall')
plt.xlabel('Fall-out')
plt.show()