# User Experience Analytics

## Imports

In [1]:
import pickle
import numpy as np
import pandas as pd
from math import floor
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy.stats import zscore
from sklearn import metrics
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, normalize
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option("expand_frame_repr", False)
pd.set_option('display.float_format', '{:.2f}'.format)

In [3]:
sys.path.append(os.path.abspath(os.path.join('../scripts')))
from df_selector import DfSelector
from df_outlier import DfOutlier
from vis_seaborn import *
from vis_plotly import *

2021-07-17 03:40:26,782 — DfSelector — DEBUG — Loaded successfully!
2021-07-17 03:40:26,788 — DfOutlier — DEBUG — Loaded successfully!


## Data reading

In [4]:
df = pd.read_csv("../data/clean_data.csv")
user_experiance = pd.read_csv("../data/user_experiance.csv")
user_experiance.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105714 entries, 0 to 105713
Data columns (total 4 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   cluster        105714 non-null  int64  
 1   total_avg_rtt  105714 non-null  float64
 2   total_avg_tp   105714 non-null  float64
 3   total_avg_tcp  105714 non-null  float64
dtypes: float64(3), int64(1)
memory usage: 3.2 MB


In [5]:
user_engagement = pd.read_csv("../data/user_engagement.csv")
user_engagement.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105714 entries, 0 to 105713
Data columns (total 4 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   cluster            105714 non-null  int64  
 1   sessions           105714 non-null  float64
 2   duration           105714 non-null  float64
 3   total_data_volume  105714 non-null  float64
dtypes: float64(3), int64(1)
memory usage: 3.2 MB


## Task 4.1

engagement score to each user. Consider the engagement score as the Euclidean distance between the user data point & the less engaged cluster(use the first clustering for this)(Euclidean Distance)


In [6]:
with open("../models/user_engagement.pkl", "rb") as f:
    kmeans1 = pickle.load(f)

In [7]:
less_engagement = 1

Calculate and assign the distance between the centroid and samples

In [8]:
distance = kmeans1.fit_transform(user_engagement)
distance_from_less_engagement = list(
    map(lambda x: x[less_engagement], distance))
user_engagement['enga_score'] = distance_from_less_engagement
user_engagement.head(5)

Unnamed: 0,cluster,sessions,duration,total_data_volume,enga_score
0,3,1.0,116720.0,878690574.0,579384568.44
1,2,1.0,181230.0,156859643.0,1301215488.72
2,2,1.0,134969.0,595966483.0,862108653.3
3,0,1.0,49878.0,422320698.0,1035754448.94
4,1,2.0,37104.0,1457410944.0,692616.94


experience score to each user. Consider the experience score as the Euclidean distance between the user data point & the worst experience’s cluster.


In [9]:
with open("../models/user_experiance.pkl", "rb") as f:
    kmeans2 = pickle.load(f)

In [10]:
worst_experiance = 1

In [11]:
distance = kmeans2.fit_transform(user_experiance)
distance_from_worest_experiance = list(
    map(lambda x: x[worst_experiance], distance))
user_experiance['expe_score'] = distance_from_worest_experiance
user_experiance.head(5)


Unnamed: 0,cluster,total_avg_rtt,total_avg_tp,total_avg_tcp,expe_score
0,0,46.0,76.0,729692.0,7816408.21
1,0,31.0,99.0,15743.0,8530354.49
2,2,50.0,97.0,4032874.25,4513249.16
3,0,84.0,248.0,5056.0,8541041.06
4,1,119.0,43204.5,8980965.88,435395.44


## Task 4.2

Consider the average of both engagement & experience scores as the satisfaction score & report the top 10 satisfied customer


In [None]:
customer_id_engagement = user_engagement.index
customer_id_experiance = user_experiance.index
customers_intersection = list(
    set(customer_id_engagement).intersection(customer_id_experiance))

data_engagement2 = user_engagement.reset_index()
data_engagement2 = data_engagement2[data_engagement2['MSISDN/Number'].isin(
    customers_intersection)]

data_experiance2 = user_experiance.reset_index()
data_experiance2 = data_experiance2[data_experiance2['MSISDN/Number'].isin(
    customers_intersection)]

data_users = pd.merge(data_engagement2, data_experiance2, on='MSISDN/Number')

data_users['sat_score'] = (
    data_users['enga_score'] + data_users['expe_score'])/2


## Task 4.3


Build a regression model of your choice to predict the satisfaction score of a customer.


## Task 4.4


Run a k-means(k=2) on the engagement & the experience score .


## Task 4.5


Aggregate the average satisfaction & experience score per cluster.


## Task 4.6


Export your final table containing all user id + engagement, experience & satisfaction scores in your local MySQL database. Report a screenshot of a select query output on the exported table.


## Task 4.7


Model deployment tracking - deploy the model and monitor your model. Here you can use MlOps tools which can help you to track your model’s change.  Your model tracking report includes code version, start and end time, source, parameters, metrics(loss convergence) and artifacts or any output file regarding each specific run. (CSV file, screenshot)
