<h1> user_experience_analysis</h1>

<h2> Imporing data and packages </h2>

Importing essential libraries

In [1]:
import pandas as pd
import numpy as np
import os
import warnings
from pandasql import sqldf
import matplotlib.pyplot as plt
import seaborn as sns
import sys
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

Suppressing warning messages

In [2]:
warnings.filterwarnings('ignore')

Setting file paths

In [3]:
parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) # setting path to parent directory
scripts_dir = os.path.join(parent_dir,"scripts") # setting path to scripts directory
data_path = os.path.join(parent_dir,"data","teleco_clean_data.csv") # setting path to data directory

sys.path.insert(1, scripts_dir)

loading cleaning and summarizing scripts

In [4]:
from data_cleaning_functions import DataCleaner as Cleaner 
from data_summarizing_functions import DataSummarizer as Sumar

cleaner = Cleaner()
sumar = Sumar()

loading dataframes

In [5]:
df3= pd.read_csv(data_path)

In [6]:
sumar.summ_columns(df3)

Unnamed: 0,variables,missing_count,missing_percent_(%),data_type,unique_values
0,Bearer Id,991,1.0,object,134709
1,Dur. (ms),0,0.0,float64,89525
2,MSISDN/Number,1066,1.0,object,106857
3,Avg RTT DL (ms),0,0.0,float64,2130
4,Avg RTT UL (ms),0,0.0,float64,722
5,Avg Bearer TP DL (kbps),0,0.0,float64,41753
6,Avg Bearer TP UL (kbps),0,0.0,float64,14528
7,TCP DL Retrans. Vol (MB),0,0.0,float64,54399
8,TCP UL Retrans. Vol (MB),0,0.0,float64,35121
9,Activity Duration DL (ms),0,0.0,float64,102561


In [7]:
cleaner.fill_missing_by_mode(df3)
sumar.summ_columns(df3)

Unnamed: 0,variables,missing_count,missing_percent_(%),data_type,unique_values
0,Bearer Id,0,0.0,object,134708
1,Dur. (ms),0,0.0,float64,89525
2,MSISDN/Number,0,0.0,object,106856
3,Avg RTT DL (ms),0,0.0,float64,2130
4,Avg RTT UL (ms),0,0.0,float64,722
5,Avg Bearer TP DL (kbps),0,0.0,float64,41753
6,Avg Bearer TP UL (kbps),0,0.0,float64,14528
7,TCP DL Retrans. Vol (MB),0,0.0,float64,54399
8,TCP UL Retrans. Vol (MB),0,0.0,float64,35121
9,Activity Duration DL (ms),0,0.0,float64,102561


In [17]:
grouping_lis = ["MSISDN/Number"]
aggr_lis = ["Handset Type", "Avg RTT DL (ms)", "Avg RTT UL (ms)", "Avg Bearer TP DL (kbps)", "Avg Bearer TP UL (kbps)", "TCP DL Retrans. Vol (MB)", "TCP UL Retrans. Vol (MB)" ] 
metric_lis = [pd.Series.mode, "sum","sum", "sum", "sum", "sum", "sum"]
col_names = ["Handset", "RTT DL (ms)", "RTT UL (ms)", "TP DL (kbps)", "TP UL (kbps)", "TCP DL Retrans (MB)", "TCP UL Retrans (MB)"]

aggr_df_2 = sumar.find_agg(df3, grouping_lis, aggr_lis, metric_lis, col_names)
aggr_df_2['Handset'] = aggr_df_2['Handset'].astype(str)
 

In [18]:
sumar.combineColumns(aggr_df_2, "RTT DL (ms)", "RTT UL (ms)", "RTT_total")
sumar.combineColumns(aggr_df_2, "TP DL (kbps)", "TP UL (kbps)", "TP_total")
sumar.combineColumns(aggr_df_2, "TP DL (kbps)", "TP UL (kbps)", "TP_total")
sumar.combineColumns(aggr_df_2, "TCP DL Retrans (MB)", "TCP UL Retrans (MB)", "TCP_total")

sumar.summ_columns(aggr_df_2)


Unnamed: 0,variables,missing_count,missing_percent_(%),data_type,unique_values
0,MSISDN/Number,0,0.0,object,106856
1,Handset,0,0.0,object,1399
2,RTT DL (ms),0,0.0,float64,2169
3,RTT UL (ms),0,0.0,float64,801
4,TP DL (kbps),0,0.0,float64,36610
5,TP UL (kbps),0,0.0,float64,14364
6,TCP DL Retrans (MB),0,0.0,float64,43803
7,TCP UL Retrans (MB),0,0.0,float64,31274
8,RTT_total,0,0.0,float64,2337
9,TP_total,0,0.0,float64,38063


Listing the top TCP, RTT and TP values

In [11]:
sumar.show_N_per_col(aggr_df_2, "MSISDN/Number", ["RTT_total", "TP_total", "TCP_total"], 10)


Top 10 customers based onRTT_total

           MSISDN/Number  RTT_total
13527  33,626,320,676.00   315866.0
47984  33,662,317,023.00    96924.0
41945  33,660,874,265.00    64670.0
80250  33,683,692,867.00    54848.0
85684  33,698,551,167.00    46021.0
96415  33,761,813,523.00    37084.0
70143  33,668,791,629.00    36304.0
93845  33,760,941,100.00    27278.0
1714   33,606,788,933.00    26300.0
74241  33,671,816,754.00    25715.0

Top 10 customers based onTP_total

           MSISDN/Number    TP_total
13527  33,626,320,676.00  18184634.0
97585  33,762,333,464.00    902222.0
69911  33,668,708,263.00    840424.0
36258  33,659,546,392.00    798342.0
73147  33,669,946,573.00    690815.0
69021  33,668,425,947.00    676858.0
84487  33,698,174,760.00    618797.0
13142  33,625,700,673.00    564179.0
29314  33,658,075,438.00    550352.0
98214  33,762,644,658.00    549661.0

Top 10 customers based onTCP_total

           MSISDN/Number     TCP_total
13527  33,626,320,676.00  10689.722649
66861  33

showing the bottom 10 RTT, TP, and TCP

In [12]:
sumar.show_N_per_col(aggr_df_2, "MSISDN/Number", ["RTT_total", "TP_total", "TCP_total"], 10, "bottom")


Top 10 customers based onRTT_total

           MSISDN/Number  RTT_total
28879  33,652,982,188.00        0.0
44749  33,661,575,382.00        0.0
39067  33,660,204,450.00        2.0
83397  33,695,045,499.00        4.0
36040  33,659,493,541.00        5.0
69663  33,668,619,895.00        6.0
21981  33,647,679,481.00        8.0
74435  33,672,196,893.00        9.0
3883   33,610,846,366.00        9.0
17864  33,634,684,770.00       10.0

Top 10 customers based onTP_total

           MSISDN/Number  TP_total
48997  33,662,540,046.00       0.0
66051  33,667,462,490.00       0.0
27425  33,651,161,673.00       0.0
27068  33,650,957,457.00       0.0
24369  33,650,346,934.00       0.0
23474  33,650,128,703.00       0.0
69657  33,668,618,348.00       0.0
23045  33,650,034,044.00       0.0
71457  33,669,225,540.00       0.0
71500  33,669,241,058.00       0.0

Top 10 customers based onTCP_total

            MSISDN/Number  TCP_total
23473   33,650,128,412.00   0.000097
6360    33,614,777,138.00   0.00012

In [19]:
grouping_lis = ["Handset"]
aggr_lis = ["TP_total", "TCP_total"] 
metric_lis = ["mean", "mean"]
col_names = ["mean_TP", "mean_TCP"]

handset_aggr_df = sumar.find_agg(aggr_df_2, grouping_lis, aggr_lis, metric_lis, col_names)


In [21]:
sumar.show_N_per_col(handset_aggr_df, "Handset", ["mean_TP", "mean_TCP"], 10)


Top 10 customers based onmean_TP

                                 Handset        mean_TP
1154         Spa Condor Elect. Allure M2  168623.000000
422                       Huawei Par-Lx9  144115.000000
321                     Huawei B715S-23C  141692.500000
152   Asustek Wireless-Ac1200 Lte Router  125525.000000
705               New-Bund Technol. Thor  113444.500000
704              New-Bund Technol. Mix 2  105211.000000
573                               Lg G6+  100135.000000
1391           Zyxel Communicat. Sbg3600   97351.000000
466                       Huawei Y9 2019   91739.000000
1334          Xiaomi Communica. M1902F1G   85613.666667

Top 10 customers based onmean_TCP

                                        Handset    mean_TCP
584                                  Lg Lg-H635  905.063286
324                             Huawei Bln-Al10  329.174592
142         Asustek Asus Zenfone Selfie Zd551Kl  321.658306
972             Samsung Galaxy Tab S3 (Sm-T825)  253.310671
345          