<a href="https://colab.research.google.com/github/ZeyadSabbah/TrivagoRecommenderSystem/blob/master/EvaluatingModels.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Evaluating Models
## Mounting Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
%cd /content/drive/My Drive/Trivago/Project/TrivagoRecommenderSystem

/content/drive/My Drive/Trivago/Project/TrivagoRecommenderSystem


## Loading Libraries & Datasets

In [0]:
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
import math
import matplotlib.pyplot as plt
from datetime import datetime
import re
import random
import joblib
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)

In [0]:
TrainDataFilepath = '/content/drive/My Drive/Trivago/Datasets/clean_data/TrainData.csv'
valFilepath = '/content/drive/My Drive/Trivago/Datasets/clean_data/val.csv'
testFilepath = '/content/drive/My Drive/Trivago/Datasets/clean_data/test.csv'

TrainData = pd.read_csv(TrainDataFilepath)
valData = pd.read_csv(valFilepath)
testData = pd.read_csv(testFilepath)

In [0]:
GlobalPath = '/content/drive/My Drive/Trivago/Datasets/clean_data/item_global.csv'
GlobalData = pd.read_csv(GlobalPath)
GlobalData.drop(columns=['Unnamed: 0', 'properties'], inplace=True)

## Validation & Test sets Scaling

In [0]:
#declaring features and label
features = ['price', 'item_rank', 'price_rank', 'session_duration', 'item_duration', 'item_session_duration', 'item_interactions', 'maximum_step', 'top_list',
            'NumberOfProperties', 'NumberInImpressions', 'NumberInReferences', 'NumberAsClickout', 'NumberAsFinalClickout', 'FClickoutToImpressions',
            'FClickoutToReferences', 'FClickoutToClickout', 'MeanPrice', 'AveragePriceRank']
label = ['clickout']
X_train = TrainData[features]
y_train = TrainData[label]

In [0]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

num_pipeline = Pipeline([
('imputer', SimpleImputer(strategy="median")),
('std_scaler', StandardScaler()),
])

from sklearn.compose import ColumnTransformer
full_pipeline = ColumnTransformer([
("num", num_pipeline, list(X_train))
])

X_train_scaled = full_pipeline.fit_transform(X_train)

In [0]:
def get_data_clickout(data):
  data_clickout = data[data['action_type']=='clickout item'].groupby('session_id').tail(1)
  return data_clickout

def get_item_id(data_clickout):
  item_id = data_clickout[['session_id', 'impressions']]
  item_id['impressions'] = item_id['impressions'].apply(lambda x: x.split('|'))
  item_id = item_id.explode('impressions')
  item_id = item_id.rename(columns={'impressions':'item_id'})
  item_id = item_id.reset_index(drop=True)
  return item_id

def get_price(data_clickout):
  price = data_clickout[['session_id', 'prices']]
  price['prices'] = price['prices'].apply(lambda x: x.split('|'))
  price = price.explode('prices')
  price['prices'] = price['prices'].apply(lambda x: int(x))
  price = price.rename(columns={'prices':'price'})
  price = price.reset_index(drop=True)
  return price

def get_item_rank(data_clickout):
  item_rank = data_clickout[['session_id', 'impressions']]
  item_rank['impressions'] = item_rank['impressions'].apply(lambda x: x.split('|'))
  item_rank['impressions'] = item_rank['impressions'].apply(lambda x: list(range(1, len(x) + 1)))
  item_rank = item_rank.explode('impressions')
  item_rank = item_rank.rename(columns={'impressions':'item_rank'})
  item_rank = item_rank.reset_index(drop=True)
  return item_rank

def get_price_rank(data):
  price_rank = data.groupby('session_id', sort=False).price.apply(lambda x: x.values).to_frame().reset_index().rename(columns={'price':'price_list'})
  price_rank.price_list = price_rank.price_list.apply(lambda x: np.argsort(x))
  price_rank = price_rank.rename(columns={'price_list':'price_rank'})
  price_rank = price_rank.explode('price_rank')
  price_rank = price_rank.reset_index(drop=True)
  return price_rank

def get_clickout(data_clickout, item_id):
  clickout = data_clickout[['session_id','reference']]
  clickout = item_id.merge(clickout, on='session_id', how='left')
  clickout['clickout'] = clickout.apply(lambda x: 1 if x['item_id'] == x['reference'] else 0, axis=1)
  clickout.drop(columns='reference', inplace=True)
  clickout = clickout.reset_index(drop=True)
  return clickout

def get_session_duration(data, item_id):
  session_duration = data.groupby('session_id', sort=False).timestamp.max() - data.groupby('session_id', sort=False).timestamp.min()
  session_duration = session_duration.to_frame().rename(columns={'timestamp':'session_duration'})
  session_duration = item_id.merge(session_duration, on='session_id', how='left')
  session_duration.drop(columns='item_id', inplace=True)
  session_duration = session_duration.reset_index(drop=True)
  return session_duration

def get_item_duration(data, item_id):
  item_duration = data.groupby(['session_id', 'reference'], sort=False).timestamp.max() - data.groupby(['session_id', 'reference'], sort=False).timestamp.min()
  item_duration = item_duration.reset_index().rename(columns={'reference':'item_id', 'timestamp':'item_duration'})
  item_duration = item_id.merge(item_duration, left_on=['session_id', 'item_id'], right_on=['session_id', 'item_id'], how='left')
  item_duration = item_duration.fillna(0)
  item_duration = item_duration.reset_index(drop=True)
  return item_duration

def get_item_session_duration(item_duration, session_duration):
  item_duration['item_session_duration'] = item_duration.item_duration/session_duration.session_duration
  item_session_duration = item_duration[['session_id', 'item_id', 'item_session_duration']]
  item_duration = item_duration[['session_id', 'item_id', 'item_duration']]
  item_session_duration = item_session_duration.fillna(0)
  item_session_duration = item_session_duration.reset_index(drop=True)
  return item_session_duration

def get_item_interactions(data, item_id):
  item_interactions = data.groupby(['session_id', 'reference']).step.count().to_frame().reset_index()
  item_interactions = item_interactions.rename(columns={'reference':'item_id', 'step':'item_interactions'})
  item_interactions = item_id.merge(item_interactions, left_on=['session_id', 'item_id'], right_on=['session_id', 'item_id'], how='left')
  item_interactions = item_interactions.fillna(0)
  item_interactions = item_interactions.reset_index(drop=True)
  return item_interactions

def get_maximum_step(data, item_id):
  maximum_step = data.groupby('session_id', sort=False).step.max().to_frame().reset_index()
  maximum_step = maximum_step.rename(columns={'step':'maximum_step'})
  maximum_step = item_id.merge(maximum_step, on='session_id', how='left')
  maximum_step = maximum_step.reset_index(drop=True)
  return maximum_step

def get_top_list(item_rank):
  top_list = item_rank[['session_id', 'item_rank']]
  top_list['top_list'] = top_list.apply(lambda x: 1 if x['item_rank'] < 6 else 0, axis=1)
  top_list = top_list.reset_index(drop=True)
  return top_list

In [0]:
def transform_data(data):
  FinalClickoutDF = get_data_clickout(data)
  item_id = get_item_id(FinalClickoutDF)
  price = get_price(FinalClickoutDF)
  item_rank = get_item_rank(FinalClickoutDF)
  price_rank = get_price_rank(price)
  clickout = get_clickout(FinalClickoutDF, item_id)
  session_duration = get_session_duration(data, item_id)
  item_duration = get_item_duration(data, item_id)
  item_session_duration = get_item_session_duration(item_duration, session_duration)
  item_interactions = get_item_interactions(data, item_id)
  maximum_step = get_maximum_step(data, item_id)
  top_list = get_top_list(item_rank)
  
  local_data = item_id.copy()
  local_data['price'] = price.price
  local_data['item_rank'] = item_rank.item_rank
  local_data['price_rank'] = price_rank.price_rank
  local_data['clickout'] = clickout.clickout
  local_data['session_duration'] = session_duration.session_duration
  local_data['item_duration'] = item_duration.item_duration
  local_data['item_session_duration'] = item_session_duration.item_session_duration
  local_data['item_interactions'] = item_interactions.item_interactions
  local_data['maximum_step'] = maximum_step.maximum_step
  local_data['top_list'] = top_list.top_list
  GlobalPath = '/content/drive/My Drive/Trivago/Datasets/clean_data/item_global.csv'
  GlobalData = pd.read_csv(GlobalPath)
  GlobalData.item_id = GlobalData.item_id.apply(lambda x: str(x))
  data = local_data.merge(GlobalData, on='item_id', how='left')

  return data

In [0]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

num_pipeline = Pipeline([
('imputer', SimpleImputer(strategy="median")),
('std_scaler', StandardScaler()),
])

from sklearn.compose import ColumnTransformer
full_pipeline = ColumnTransformer([
("num", num_pipeline, list(X_train))
]);

In [0]:
#validation set transformation and scaling
valData = transform_data(valData)
valData_sessions_item = valData[['session_id', 'item_id']]
X_val = valData[features]
y_val = valData[label]

X_val_scaled = full_pipeline.fit_transform(X_val)
X_val_scaled

In [0]:
#test set transformation and scaling
testData = transform_data(testData)
testData_sessions_item = testData[['session_id', 'item_id']]
X_test = testData[features]
y_test  = testData[label]

X_test_scaled = full_pipeline.fit_transform(X_val)
X_test_scaled

## Mean Reciprocal Rank
Mean Reciprocal Rank is a measure to evaluate systems that return a ranked list of answers to queries.

In [0]:
#function is from this page https://gist.github.com/bwhite/3726239
def mean_reciprocal_rank(rs):
    """Score is reciprocal of the rank of the first relevant item
    First element is 'rank 1'.  Relevance is binary (nonzero is relevant).
    Example from http://en.wikipedia.org/wiki/Mean_reciprocal_rank
    >>> rs = [[0, 0, 1], [0, 1, 0], [1, 0, 0]]
    >>> mean_reciprocal_rank(rs)
    0.61111111111111105
    >>> rs = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0]])
    >>> mean_reciprocal_rank(rs)
    0.5
    >>> rs = [[0, 0, 0, 1], [1, 0, 0], [1, 0, 0]]
    >>> mean_reciprocal_rank(rs)
    0.75
    Args:
        rs: Iterator of relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Mean reciprocal rank
    """
    rs = (np.asarray(r).nonzero()[0] for r in rs)
    return np.mean([1. / (r[0] + 1) if r.size else 0. for r in rs])


def r_precision(r):
    """Score is precision after all relevant documents have been retrieved
    Relevance is binary (nonzero is relevant).
    >>> r = [0, 0, 1]
    >>> r_precision(r)
    0.33333333333333331
    >>> r = [0, 1, 0]
    >>> r_precision(r)
    0.5
    >>> r = [1, 0, 0]
    >>> r_precision(r)
    1.0
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        R Precision
    """
    r = np.asarray(r) != 0
    z = r.nonzero()[0]
    if not z.size:
        return 0.
    return np.mean(r[:z[-1] + 1])


def precision_at_k(r, k):
    """Score is precision @ k
    Relevance is binary (nonzero is relevant).
    >>> r = [0, 0, 1]
    >>> precision_at_k(r, 1)
    0.0
    >>> precision_at_k(r, 2)
    0.0
    >>> precision_at_k(r, 3)
    0.33333333333333331
    >>> precision_at_k(r, 4)
    Traceback (most recent call last):
        File "<stdin>", line 1, in ?
    ValueError: Relevance score length < k
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Precision @ k
    Raises:
        ValueError: len(r) must be >= k
    """
    assert k >= 1
    r = np.asarray(r)[:k] != 0
    if r.size != k:
        raise ValueError('Relevance score length < k')
    return np.mean(r)


def average_precision(r):
    """Score is average precision (area under PR curve)
    Relevance is binary (nonzero is relevant).
    >>> r = [1, 1, 0, 1, 0, 1, 0, 0, 0, 1]
    >>> delta_r = 1. / sum(r)
    >>> sum([sum(r[:x + 1]) / (x + 1.) * delta_r for x, y in enumerate(r) if y])
    0.7833333333333333
    >>> average_precision(r)
    0.78333333333333333
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Average precision
    """
    r = np.asarray(r) != 0
    out = [precision_at_k(r, k + 1) for k in range(r.size) if r[k]]
    if not out:
        return 0.
    return np.mean(out)


def mean_average_precision(rs):
    """Score is mean average precision
    Relevance is binary (nonzero is relevant).
    >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1]]
    >>> mean_average_precision(rs)
    0.78333333333333333
    >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1], [0]]
    >>> mean_average_precision(rs)
    0.39166666666666666
    Args:
        rs: Iterator of relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Mean average precision
    """
    return np.mean([average_precision(r) for r in rs])


def dcg_at_k(r, k, method=0):
    """Score is discounted cumulative gain (dcg)
    Relevance is positive real values.  Can use binary
    as the previous methods.
    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> dcg_at_k(r, 1)
    3.0
    >>> dcg_at_k(r, 1, method=1)
    3.0
    >>> dcg_at_k(r, 2)
    5.0
    >>> dcg_at_k(r, 2, method=1)
    4.2618595071429155
    >>> dcg_at_k(r, 10)
    9.6051177391888114
    >>> dcg_at_k(r, 11)
    9.6051177391888114
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Discounted cumulative gain
    """
    r = np.asfarray(r)[:k]
    if r.size:
        if method == 0:
            return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
        elif method == 1:
            return np.sum(r / np.log2(np.arange(2, r.size + 2)))
        else:
            raise ValueError('method must be 0 or 1.')
    return 0.


def ndcg_at_k(r, k, method=0):
    """Score is normalized discounted cumulative gain (ndcg)
    Relevance is positive real values.  Can use binary
    as the previous methods.
    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> ndcg_at_k(r, 1)
    1.0
    >>> r = [2, 1, 2, 0]
    >>> ndcg_at_k(r, 4)
    0.9203032077642922
    >>> ndcg_at_k(r, 4, method=1)
    0.96519546960144276
    >>> ndcg_at_k([0], 1)
    0.0
    >>> ndcg_at_k([1], 2)
    1.0
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Normalized discounted cumulative gain
    """
    dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k, method) / dcg_max


if __name__ == "__main__":
    import doctest
    doctest.testmod()

## Evaluating Models

### Without Resampling

#### Logistic Regression

In [0]:
LR_model = joblib.load('/content/drive/My Drive/Trivago/Project/TrivagoRecommenderSystem/LR_model.pkl')
Predictions = LR_model.predict(X_val_scaled)
Probabilities = LR_model.predict_proba(X_val_scaled)