## User Experience Analytics

### Import Liberaries

In [1]:
import pickle
import numpy as np
import pandas as pd
from math import floor
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy.stats import zscore
from sklearn import metrics
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, normalize
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
import sys, os

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option("expand_frame_repr", False)
pd.set_option('display.float_format', '{:.2f}'.format)

In [None]:
sys.path.append(os.path.abspath(os.path.join('../scripts')))
from df_outlier import DfOutlier
from vis_seaborn import *

from vis_plotly import *

### Loading Data

In [None]:
df1 = pd.read_csv("../data/my_clean_data.csv")
df1.info()

1. 

In [None]:
user_experience = df1[[
    "MSISDN/Number",
    "Avg RTT DL (ms)",
    "Avg RTT UL (ms)",
    "Avg Bearer TP DL (kbps)",
    "Avg Bearer TP UL (kbps)",
    "TCP DL Retrans. Vol (Bytes)",
    "TCP UL Retrans. Vol (Bytes)",
    "Handset Type"]].copy()

In [None]:
user_experience['total_avg_rtt'] = user_experience['Avg RTT DL (ms)'] + user_experience['Avg RTT UL (ms)']
user_experience['total_avg_tp'] = user_experience['Avg Bearer TP DL (kbps)'] + user_experience['Avg Bearer TP UL (kbps)']
user_experience['total_avg_tcp'] = user_experience['TCP DL Retrans. Vol (Bytes)'] + user_experience['TCP UL Retrans. Vol (Bytes)']
user_experience.info()

In [None]:
_user_experience = user_experience.groupby('MSISDN/Number').agg({
    'total_avg_rtt': 'sum',
    'total_avg_tp': 'sum',
    'total_avg_tcp': 'sum',
    'Handset Type': [lambda x: x.mode()[0]]})

user_experience = pd.DataFrame(columns=[
    "total_avg_rtt",
    "total_avg_tp",
    "total_avg_tcp",
    "Handset Type"])

user_experience["total_avg_rtt"] = _user_experience["total_avg_rtt"]['sum']
user_experience["total_avg_tp"] = _user_experience["total_avg_tp"]['sum']
user_experience["total_avg_tcp"] = _user_experience["total_avg_tcp"]['sum']
user_experience["Handset Type"] = _user_experience["Handset Type"]['<lambda>']
user_experience.head()

In [None]:
2. 

In [None]:
sorted_by_tcp = user_experience.sort_values(
    'total_avg_tcp', ascending=False)
top_10 = sorted_by_tcp.head(10)['total_avg_tcp']
last_10 = sorted_by_tcp.tail(10)['total_avg_tcp']
most_10 = user_experience['total_avg_tcp'].value_counts().head(10)


In [None]:
mult_hist([top_10, last_10, most_10], 1,
          3, "TCP values in the dataset", ['Top 10', 'Last 10', 'Most 10'])

In [None]:
sorted_by_rtt = user_experience.sort_values(
    'total_avg_rtt', ascending=False)
top_10 = sorted_by_rtt.head(10)['total_avg_rtt']
last_10 = sorted_by_rtt.tail(10)['total_avg_rtt']
most_10 = user_experience['total_avg_rtt'].value_counts().head(10)

In [None]:

mult_hist([top_10, last_10, most_10, top_10], 1,
          3, "TCP values in the dataset", ['Top 10', 'Last 10', 'Most 10'])

In [None]:
sorted_by_tp = user_experience.sort_values(
    'total_avg_tp', ascending=False)
top_10 = sorted_by_tp.head(10)['total_avg_tp']
last_10 = sorted_by_tp.tail(10)['total_avg_tp']
most_10 = user_experience['total_avg_tp'].value_counts().head(10)

In [None]:
ult_hist([top_10, last_10, most_10, top_10], 1,
          3, "TCP values in the dataset", ['Top 10', 'Last 10', 'Most 10'])

In [None]:
3. 

In [None]:
handset_type_df = user_experience.groupby('Handset Type').agg(
    {'total_avg_tp': 'mean', 'total_avg_tcp': 'mean'})
handset_type_df.head()

In [None]:
sorted_by_tp = handset_type_df.sort_values(
    'total_avg_tp', ascending=False)
top_tp = sorted_by_tp['total_avg_tp']

In [None]:
hist(top_tp, interactive=True)


In [None]:
hist(top_tp.head(20))

In [None]:
sorted_by_tcp = handset_type_df.sort_values(
    'total_avg_tcp', ascending=False)
top_tcp = sorted_by_tcp['total_avg_tcp']

In [None]:
hist(top_tcp, interactive=True)

In [None]:
hist(top_tcp.head(20))

4. 

In [None]:
experiance_metric_df = user_experience[[
    "total_avg_rtt",
    "total_avg_tp",
    "total_avg_tcp"]].copy()
experiance_metric_df.head()

In [None]:
experiance_metric_df.plot.box()

In [None]:
df_outliers = DfOutlier(experiance_metric_df)
df_outliers.getOverview()

In [None]:
df_outliers.replace_outliers_with_iqr(
    ["total_avg_rtt",
     "total_avg_tp",
     "total_avg_tcp"])
df_outliers = DfOutlier(experiance_metric_df)
df_outliers.getOverview()

In [None]:
experiance_metric_df.plot.box()

In [None]:
scaler = StandardScaler()
scaled_array = scaler.fit_transform(df_outliers.df)
scaled_array
pd.DataFrame(scaled_array).head(5)

In [None]:
data_normalized = normalize(scaled_array)
pd.DataFrame(data_normalized).head(5)

In [None]:
kmeans = KMeans(n_clusters=3, random_state=0).fit(data_normalized)
kmeans.labels_

In [None]:
experiance_metric_df.insert(0, 'cluster', kmeans.labels_)
experiance_metric_df

In [None]:
experiance_metric_df['cluster'].value_counts()

In [None]:
fig = px.scatter(experiance_metric_df, x='total_avg_rtt', y="total_avg_tp",
                 color='cluster', size='total_avg_tcp')
fig.show()

In [None]:
scatter3D(experiance_metric_df, x="total_avg_tcp", y="total_avg_rtt", z="total_avg_tp",
          c="cluster", rotation=[-1.5, -1.5, 1])