# Deep Detractor Detection

Solve the binary classification problem of deciding whether a customer is a deep detractor. 
A deep detractor is someone who replied with a recommendation score of 2 or less in an NPS survey.

We work on a ground truth graph, i.e. a signal was observed for all nodes. 
We sample a small subset of the observed signal and try to reconstruct the signal on the entire graph.
Then we compute precision-recall curves with the reconstructed signal.

We consider the performance for increasing sample size.
The execution for a single sample size can be done with `deep_detractor_detection.py`.

## Setup

In [None]:
import sys
PROJECT_PATH = "/home/christopher_orlowicz1_vodafone_c/gershgorin"
sys.path.append(PROJECT_PATH)
%load_ext autoreload
%autoreload 2

In [None]:
%cd $PROJECT_PATH

In [None]:
#!pip install -q -r requirements.txt
#!pip install --force-reinstall faiss-gpu

In [None]:
import math
import time
from tqdm import tqdm

from google.cloud import bigquery
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = "retina"
import networkx as nx
import numpy as np
import pandas as pd
import pandas_gbq
import scipy
from scipy import sparse
from sklearn.neighbors import kneighbors_graph

import numpy as np
import pandas as pd
from sklearn import metrics

import src.utils.plotting as plt_util
from src.gershgorin.bs_gda import bs_gda
from src.graph.graph import Graph
from src.gsp import reconstruction
from src.utils import data_handler
from src.utils.yaml_reader import YamlReader

## Config

In [None]:
# specify directory from which to load the data
DIR = "out/customer_analytics/2023-01"
size = "1000"
# load graph
adj_matrix = sparse.load_npz(f"{DIR}/graph/knn/{size}/adj_matrix.npz")
graph = Graph(adj_matrix)
# load signal
s = np.load(f"{DIR}/signal/{size}/nps.npy").flatten()

In [None]:
# sampling configuration
sampling_budgets = np.linspace(100, 900, 9).astype(int)
config = {
      "mu": 0.01,  # regularization strength of smoothness prior
      "eps": 1e-5,  # precision
      "p_hops": 3,  # number of hops to take in the node neighborhood
      "parallel": True  # whether to parallelize the algorithm where possible
}

## Run classification

In [None]:
def is_deep_detractor(s: np.ndarray) -> np.ndarray:
    """
    Checks which customers are deep detractors.
    A deep detractor is defined as someone who gave a recommendation score <= 2.
    :param s: NPS signal vector
    :return: boolean array
    """
    return s <= 2

In [None]:
def plot_precision_recall_curve(precision_vals: np.ndarray, recall_vals: np.ndarray, label: str):
    """
    Draws a precision-recall curve.
    :param precision_vals: Precision values such that element i is the precision of predictions with score >= thresholds[i] and the last element is 1.
    :param recall_vals: Decreasing recall values such that element i is the recall of predictions with score >= thresholds[i] and the last element is 0.
    :param label: label of the curve
    """
    plt.plot(recall_vals, precision_vals, label=k)
    plt.xlabel("Recall")
    plt.ylabel("Precision")

In [None]:
plt.figure(figsize=(6,4))
plt.title("Precision-recall curve\nfor the detection of deep detractors")

reconstructions = list()
for k in tqdm(sampling_budgets):
    print("Selecting sampling set...")
    sampling_set, _ = bs_gda(graph, k, **config)
    
    print("Reconstructing signal...")
    s_rec = reconstruction.reconstruct_signal(graph.laplacian(), sampling_set, s[sampling_set])
    reconstructions.append(s_rec)
    
    print("Detecting deep detractors...")
    y_true = is_deep_detractor(s)
    # invert score to stick to scikit-learn convention which uses y_score >= thresh instead of y_score <= thresh
    y_score = 1 / (s_rec + 1e-8)
    precision_vals, recall_vals, thresholds = metrics.precision_recall_curve(y_true, y_score, pos_label=1)
    plot_precision_recall_curve(precision_vals, recall_vals, label=f"{k}")
    
plt.legend(title="Sample size $k$", loc='center left', bbox_to_anchor=(1, 0.5))
plt.savefig("out/dd_detection_increasing_sample_size.pdf")