# Effectiveness analysis 
It comprises multiple performance tests.

Following steps:
    1. Load models and test datasets 
    2. Create metrics
    3. Effectiveness analysis:
        - Model performance
        - Tree-wise performance
        - Tree-Wise average Contribution
        - Query-wise performance
        - Query class performance
        - Document graded-relevance performance
        - Rank confusion matrix
        

#### Essential imports

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt

import os
import numpy as np
import pandas as pd
import xarray as xr

# Useful to reload the module without having to restart the notebook kernel
import rankeval.analysis.effectiveness
import rankeval.visualization.effectiveness
import rankeval.core.metrics

#### Data file setup

In [None]:
data_dir = '/Users/muntean/Documents/workspace/quickranktestdata'

# Dataset MSN with 2 models
data_file_msn_train = os.path.join(data_dir, "msn1/msn1.fold1.train.5k.txt")
data_file_msn_valid = os.path.join(data_dir, "msn1/msn1.fold1.vali.5k.txt")
data_file_msn_test = os.path.join(data_dir, "msn1/msn1.fold1.test.5k.txt")
model_file_50 = os.path.join(data_dir, "new.mart.50.xml")
model_file_100 = os.path.join(data_dir, "new.lmart.100.xml")

# Dataset Istella with one model
data_file_istella = "/Users/muntean/Documents/workspace/quickranktestdata/tiscali/tiscali.sample.txt"
model_file_500 = "/Users/muntean/Documents/workspace/quickranktestdata/tiscali/lambdamart-500-alberi-50-leaves.xml"

# Loading models, datasets from file

In [None]:
# Loading Models
from rankeval.core.model import RTEnsemble

msn_mart_50 = RTEnsemble(model_file_50, name="MART-50", format="QuickRank")
msn_lmart_100 = RTEnsemble(model_file_100, name="MART-100", format="QuickRank")
istella_lmart_500 = RTEnsemble(model_file_500, name="LMART-500", format="QuickRank")

In [None]:
#Loading Datasets
from rankeval.core.dataset import Dataset

#istella
istella_test = Dataset.load(data_file_istella, name="Istella-S Test", format="svmlight")

#msn
msn_train = Dataset.load(data_file_msn_train, name="MSN Train", format="svmlight")
msn_validation = Dataset.load(data_file_msn_valid, name="MSN Valid", format="svmlight")
msn_test = Dataset.load(data_file_msn_test, name="MSN Test", format="svmlight")

### Create metrics 

In [None]:
from rankeval.core.metrics import Precision, Recall, NDCG

precision_10 = Precision(cutoff=10)
recall_10 = Recall(cutoff=10)
ndcg_10 = NDCG(cutoff=10, no_relevant_results=0.5, implementation='exp')

# Effectiveness analysis
Evaluate the effectiveness of a set of models over several datasets and using a set of metrics

##  1. Model performance

- Compute the model performance analysis
- Display results
- Plot results

#### -> Computes the model performance analysis

In [None]:
from rankeval.analysis.effectiveness import model_performance

# Istella
istella_model_perf = model_performance(datasets=[istella_test], 
                           models=[istella_lmart_500], 
                           metrics=[precision_10, recall_10, ndcg_10])
print istella_model_perf
print 

# MSN5k
msn_model_perf = model_performance(datasets=[msn_test], 
                       models=[msn_mart_50, msn_lmart_100], 
                       metrics=[precision_10, recall_10, ndcg_10])
print msn_model_perf

Each analysis returns a **xarray.DataArray** data structure. xarray is an extension of the famous pandas package offering similar features and capabilities but supporting multi-dimensional data structures (with a number of dimension potentially much greater than 2)"

#### -> Display results in a tabular view

In [None]:
from rankeval.visualization.effectiveness import pretty_print_model_performance

pretty_print_model_performance(istella_model_perf)
pretty_print_model_performance(msn_model_perf)

#### -> Plot results

**plot_model_performance** takes as input:
    - the model_performance xarray (object) for given combinations of: dataset(s), model(s) and metric(s)
    - compare: str
        - the allowed values are: "models" and "metrics"
        - it allows the user to compare models or metrics
    - show values: bool
        - shows the actual values on the plotted bars

In [None]:
from rankeval.visualization.effectiveness import plot_model_performance

# MSN
plot_model_performance(msn_model_perf, compare="models",  show_values=True)
plot_model_performance(msn_model_perf, compare="metrics", show_values=True)

# Istella
plot_model_performance(istella_model_perf) 
plot_model_performance(istella_model_perf, compare="metrics", show_values=True) 


##  2. Tree-Wise Performance

- Compute the tree-wise model performance analysis
- Plot results

#### -> Computes tree-wise performance analysis

In [None]:
from rankeval.analysis.effectiveness import tree_wise_performance

istella_tree_wise_perf = tree_wise_performance(datasets=[istella_test], 
                           models=[istella_lmart_500], 
                           metrics=[precision_10, recall_10, ndcg_10],
                           step=10)
print istella_tree_wise_perf

msn_tree_wise_perf = tree_wise_performance(datasets=[msn_test], 
                           models=[msn_mart_50, msn_lmart_100], 
                           metrics=[precision_10, recall_10, ndcg_10],
                           step=10)
print msn_tree_wise_perf

#### test this later, some error when more datasets
msn_valid_tree_wise_perf = tree_wise_performance(datasets=[msn_validation, msn_test], 
                           models=[msn_mart_50], 
                           metrics=[precision_10, recall_10, ndcg_10],
                           step=10)
print msn_valid_tree_wise_perf

#### -> Plot results

**plot_tree_wise_model_performance** takes as input:
    - the tree_wise_model_performance xarray (object) for given combinations of: dataset(s), model(s) and metric(s)
    - compare: str
        - the allowed values are: "models" and "metrics"
        - it allows the user to compare models or metrics
    

In [None]:
from rankeval.visualization.effectiveness import plot_tree_wise_model_performance

plot_tree_wise_model_performance(istella_tree_wise_perf, compare = "models")
plot_tree_wise_model_performance(istella_tree_wise_perf, compare = "metrics")


plot_tree_wise_model_performance(msn_tree_wise_perf, compare = "models") 
plot_tree_wise_model_performance(msn_tree_wise_perf, compare = "metrics")

# TRY this later after fix from Salvo for multiple datasets
plot_tree_wise_model_performance(msn_valid_tree_wise_perf, compare = "models")
plot_tree_wise_model_performance(msn_valid_tree_wise_perf, compare = "datasets")

## 3. Tree-Wise Average Contribution

#### -> Computes tree average contribution analysis

In [None]:
from rankeval.analysis.effectiveness import tree_wise_performance

istella_tree_contrib = tree_wise_average_contribution(datasets=[istella_test], 
                           models=[istella_lmart_500])
print istella_tree_contrib

msn_tree_contrib = tree_wise_average_contribution(datasets=[msn_test], 
                           models=[msn_mart_50, msn_lmart_100])
print msn_tree_contrib

#### -> Plot results

**plot_tree_wise_average_contribution** takes as input:
    - the tree_wise_average_contribution xarray (object) for given combinations of: dataset(s), model(s) and metric(s)

In [None]:
from rankeval.visualization.effectiveness import plot_tree_wise_average_contribution

plot_tree_wise_average_contribution(istella_tree_contrib)
plot_tree_wise_average_contribution(msn_tree_contrib)

## 4. Query-Wise Performance

#### -> Computes query wise performance analysis

In [None]:
from rankeval.analysis.effectiveness import query_wise_performance

msn_query_wise_perf = query_wise_performance(datasets=[msn_test], 
                            models=[msn_mart_50, msn_lmart_100], 
                            metrics=[precision_10, recall_10, ndcg_10],
                            bins=50)
print msn_query_wise_perf

#### -> Plot results

**plot_query_wise_performance** takes as input:
    - the query_wise_performance xarray (object) for given combinations of: dataset(s), model(s) and metric(s)
    - compare: str
        - the allowed values are: "models" and "metrics"
        - it allows the user to compare models or metrics

In [None]:
from rankeval.visualization.effectiveness import plot_query_wise_performance

plot_query_wise_performance(msn_query_wise_perf, compare="models")
plot_query_wise_performance(msn_query_wise_perf, compare="metrics")

## 5. Document Graded Relevance

#### -> Computes document graded relevance analysis

In [None]:
from rankeval.analysis.effectiveness import document_graded_relevance

istella_doc_graded_rel = document_graded_relevance(datasets=[istella_test], 
                               models=[istella_lmart_500], 
                               bins=50)
print istella_doc_graded_rel

msn_doc_graded_rel = document_graded_relevance(datasets=[msn_test], 
                                models=[msn_mart_50, msn_lmart_100],  
                                bins=50)
print msn_doc_graded_rel

#### -> Plot results

**plot_document_graded_relevance** takes as input:
    - the document_graded_relevance xarray (object) for given combinations of: dataset(s), model(s) and metric(s)

In [None]:
from rankeval.visualization.effectiveness import plot_document_graded_relevance

plot_document_graded_relevance(istella_doc_graded_rel) 
plot_document_graded_relevance(msn_doc_graded_rel)

## 6. Rank-Confusion Matrix

#### -> Computes the rank confusion matrix analysis

In [None]:
from rankeval.analysis.effectiveness import rank_confusion_matrix

istella_confusion_matrix = rank_confusion_matrix(datasets=[istella_test], 
                               models=[istella_lmart_500])
print istella_confusion_matrix

msn_confusion_matrix = rank_confusion_matrix(datasets=[msn_test], 
                                models=[msn_mart_50, msn_lmart_100])
print msn_confusion_matrix

#### -> Plot results

**plot_rank_confusion_matrix** takes as input:
    - the rank_confusion_matrix xarray (object) for given combinations of: dataset(s), model(s) and metric(s)

In [None]:
from rankeval.visualization.effectiveness import plot_rank_confusion_matrix

plot_rank_confusion_matrix(df) 
plot_rank_confusion_matrix(df_msn) 

## 7. Query-Class Performance

#### -> Computes the query class performance analysis

In [None]:
from rankeval.analysis.effectiveness import query_class_performance

classes = ['A', 'B', 'C', 'D', 'E']
query_classes = np.array([[classes[np.random.randint(len(classes))] 
                           for _ in range(istella.n_queries)]])

istella_query_class_perf = query_class_performance(datasets=[istella_test], 
                             models=[istella_lmart_500], 
                             metrics=[precision_10, recall_10, ndcg_10],
                             query_classes=query_classes)
print istella_query_class_perf

msn_query_class_perf = query_class_performance(datasets=[msn_test], 
                                 models=[msn_mart_50, msn_lmart_100],
                                 metrics=[precision_10, recall_10, ndcg_10],
                                 query_classes=query_classes)
print msn_query_class_perf


#### -> Plot results

**plot_query_class_performance** takes as input:
    - the query_class_performance xarray (object) for given combinations of: dataset(s), model(s) and metric(s)
    - compare: str
        - the allowed values are: "models" and "metrics"
        - it allows the user to compare models or metrics
    - show values: bool
        - shows the actual values on the plotted bars

In [None]:
from rankeval.visualization.effectiveness import plot_query_class_performance

plot_query_class_performance(df, compare="metrics", show_values = True)
plot_query_class_performance(df, compare="models", show_values = True)