In [1]:
# Add the parent directory to sys.path
import sys, os
from pathlib import Path

current_directory = os.getcwd()
parent_directory = Path(current_directory).parent.parent
sys.path.insert(0, str(parent_directory))
sys.path

['c:\\Users\\mekbi\\Desktop\\Kifiya\\week-2\\telecom-data-analysis',
 'C:\\Users\\mekbi\\AppData\\Local\\Programs\\Python\\Python312\\python312.zip',
 'C:\\Users\\mekbi\\AppData\\Local\\Programs\\Python\\Python312\\DLLs',
 'C:\\Users\\mekbi\\AppData\\Local\\Programs\\Python\\Python312\\Lib',
 'C:\\Users\\mekbi\\AppData\\Local\\Programs\\Python\\Python312',
 'c:\\Users\\mekbi\\Desktop\\Kifiya\\week-2\\telecom-data-analysis\\venv',
 '',
 'c:\\Users\\mekbi\\Desktop\\Kifiya\\week-2\\telecom-data-analysis\\venv\\Lib\\site-packages',
 'c:\\Users\\mekbi\\Desktop\\Kifiya\\week-2\\telecom-data-analysis\\venv\\Lib\\site-packages\\win32',
 'c:\\Users\\mekbi\\Desktop\\Kifiya\\week-2\\telecom-data-analysis\\venv\\Lib\\site-packages\\win32\\lib',
 'c:\\Users\\mekbi\\Desktop\\Kifiya\\week-2\\telecom-data-analysis\\venv\\Lib\\site-packages\\Pythonwin']

### Load engagement and experience data

In [2]:
# Load engagement data
import pandas as pd

engagement_data = pd.read_pickle('./engagement_data.pkl')
engagement_data.head()

Unnamed: 0,MSISDN/Number,Session Dur. (ms),Session Frequency,Total Data (Bytes)
0,33601000000.0,116720.0,1,878690600.0
1,33601000000.0,181230.0,1,156859600.0
2,33601000000.0,134969.0,1,595966500.0
3,33601010000.0,49878.0,1,422320700.0
4,33601010000.0,37104.0,2,1457411000.0


In [3]:
# Load experience data
experience_data = pd.read_pickle('./experience_data.pkl')
experience_data.head()

Unnamed: 0,MSISDN/Number,Avg RTT DL (ms),Avg RTT UL (ms),Avg Bearer TP DL (kbps),Avg Bearer TP UL (kbps),TCP DL Retrans. Vol (Bytes),TCP UL Retrans. Vol (Bytes),Handset Type
0,33601000000.0,46.0,0.0,37.0,39.0,0.0,0.0,Huawei P20 Lite Huawei Nova 3E
1,33601000000.0,30.0,1.0,48.0,51.0,0.0,0.0,Apple iPhone 7 (A1778)
2,33601000000.0,0.0,0.0,48.0,49.0,0.0,0.0,undefined
3,33601010000.0,69.0,15.0,204.0,44.0,1066.0,0.0,Apple iPhone 5S (A1457)
4,33601010000.0,114.0,5.0,40395.0,103.0,9349630.0,21202.0,Apple iPhone Se (A1723)


In [4]:
experience_data = experience_data.drop('Handset Type', axis=1)
experience_data.columns

Index(['MSISDN/Number', 'Avg RTT DL (ms)', 'Avg RTT UL (ms)',
       'Avg Bearer TP DL (kbps)', 'Avg Bearer TP UL (kbps)',
       'TCP DL Retrans. Vol (Bytes)', 'TCP UL Retrans. Vol (Bytes)'],
      dtype='object')

### Assign scores to each dataset

##### Assign scores to engagement data using Euclidean Distance

In [41]:
# Get the centroids
from scripts import satisfaction_utils

engagement_centroids = satisfaction_utils.calculate_centroids(df=engagement_data.drop('MSISDN/Number', axis=1))
engagement_centroids

INFO:scripts.satisfaction_utils:Data normalization successful


INFO:scripts.satisfaction_utils:KMeans clustering successful
INFO:scripts.satisfaction_utils:Centroids calculated for 3 clusters


array([[ 2.6165375 ,  3.67617845,  3.25149318,  3.65952156],
       [-0.22650113, -0.45038988, -0.39933634, -0.44233854],
       [ 0.36485243,  0.97160709,  0.86275711,  0.94632975]])

In [42]:
# Get the least engaged clusters centroid
least_engaged_centroid = engagement_centroids[1]
least_engaged_centroid

array([-0.22650113, -0.45038988, -0.39933634, -0.44233854])

In [43]:
# Normalize the engagement data
from scripts import enagagement_utils

normalized_engagement_data = enagagement_utils.normalize_data(df=engagement_data.drop('MSISDN/Number', axis=1))
normalized_engagement_data

array([[0.00590825, 0.05555556, 0.09593143, 0.060468  ],
       [0.00938651, 0.05555556, 0.01402598, 0.03000364],
       [0.0068922 , 0.05555556, 0.06385101, 0.04804889],
       ...,
       [0.04692194, 0.05555556, 0.02256615, 0.04124173],
       [0.01325784, 0.05555556, 0.06388747, 0.0493142 ],
       [0.04651534, 0.05555556, 0.0120169 , 0.03741805]])

In [44]:
# Calculate engagement score for each user
engagement_scores = []
for user_data in normalized_engagement_data:
    score = satisfaction_utils.euclidean_distance(data=user_data,
                                                  centroid=least_engaged_centroid)
    engagement_scores.append(score)

engagement_scores[:10]

[np.float64(0.8989435407329588),
 np.float64(0.8399995907005695),
 np.float64(0.8749146147328021),
 np.float64(0.8586798808829498),
 np.float64(0.9929902081460485),
 np.float64(0.9252134780198205),
 np.float64(0.9259643781489159),
 np.float64(0.8520792570128602),
 np.float64(0.9608318364759345),
 np.float64(0.8863692972230837)]

In [45]:
# Add the scores to the engagement dataframe
engagement_data['Engagement Score'] = engagement_scores
engagement_data.head()

Unnamed: 0,MSISDN/Number,Session Dur. (ms),Session Frequency,Total Data (Bytes),Engagement Score
0,33601000000.0,116720.0,1,878690600.0,0.898944
1,33601000000.0,181230.0,1,156859600.0,0.84
2,33601000000.0,134969.0,1,595966500.0,0.874915
3,33601010000.0,49878.0,1,422320700.0,0.85868
4,33601010000.0,37104.0,2,1457411000.0,0.99299


##### Assign scores to experience data using Euclidean Distance

In [46]:
# Get the centroids
experience_centroids = satisfaction_utils.calculate_centroids(df=experience_data.drop('MSISDN/Number', axis=1))
experience_centroids

INFO:scripts.satisfaction_utils:Data normalization successful
INFO:scripts.satisfaction_utils:KMeans clustering successful
INFO:scripts.satisfaction_utils:Centroids calculated for 3 clusters


array([[ 0.55799056,  0.48969613,  0.73836009,  0.7312117 ,  0.54368531,
         0.59554496,  0.78870649],
       [-0.31995584, -0.30674891, -0.37650335, -0.34700133, -0.30607488,
        -0.31747229, -0.42493861],
       [ 3.79719841,  4.07909109,  3.67603747,  2.89658408,  3.53649543,
         3.36181411,  4.58154301]])

In [47]:
# Get the least engaged clusters centroid
worst_experience_centroid = experience_centroids[1]
worst_experience_centroid

array([-0.31995584, -0.30674891, -0.37650335, -0.34700133, -0.30607488,
       -0.31747229, -0.42493861])

In [48]:
# Normalize the engagement data
normalized_experience_data = enagagement_utils.normalize_data(df=experience_data.drop('MSISDN/Number', axis=1))
normalized_experience_data

array([[4.69867211e-02, 0.00000000e+00, 1.41506548e-04, ...,
        0.00000000e+00, 0.00000000e+00, 1.03662037e-02],
       [3.06435138e-02, 4.73933649e-03, 1.83576062e-04, ...,
        0.00000000e+00, 0.00000000e+00, 7.88185527e-03],
       [0.00000000e+00, 0.00000000e+00, 1.83576062e-04, ...,
        0.00000000e+00, 0.00000000e+00, 6.96941666e-04],
       ...,
       [0.00000000e+00, 0.00000000e+00, 3.82450129e-06, ...,
        0.00000000e+00, 0.00000000e+00, 8.86070951e-07],
       [0.00000000e+00, 0.00000000e+00, 4.20695141e-05, ...,
        0.00000000e+00, 0.00000000e+00, 3.03012547e-04],
       [0.00000000e+00, 0.00000000e+00, 7.64900257e-06, ...,
        0.00000000e+00, 0.00000000e+00, 1.77214897e-06]])

In [49]:
# Calculate engagement score for each user
experience_scores = []
for user_data in normalized_experience_data:
    score = satisfaction_utils.euclidean_distance(data=user_data,
                                                  centroid=worst_experience_centroid)
    experience_scores.append(score)

experience_scores[:10]

[np.float64(0.9363001302537535),
 np.float64(0.9307290442619912),
 np.float64(0.9146166322853239),
 np.float64(0.980498977259134),
 np.float64(1.205052122497646),
 np.float64(0.9554830876965453),
 np.float64(1.1023467351806369),
 np.float64(0.9589607504038022),
 np.float64(0.916570694573792),
 np.float64(0.949172177127797)]

In [50]:
# Add the scores to the engagement dataframe
experience_data['Experience Score'] = experience_scores
experience_data.head()

Unnamed: 0,MSISDN/Number,Avg RTT DL (ms),Avg RTT UL (ms),Avg Bearer TP DL (kbps),Avg Bearer TP UL (kbps),TCP DL Retrans. Vol (Bytes),TCP UL Retrans. Vol (Bytes),Experience Score
0,33601000000.0,46.0,0.0,37.0,39.0,0.0,0.0,0.9363
1,33601000000.0,30.0,1.0,48.0,51.0,0.0,0.0,0.930729
2,33601000000.0,0.0,0.0,48.0,49.0,0.0,0.0,0.914617
3,33601010000.0,69.0,15.0,204.0,44.0,1066.0,0.0,0.980499
4,33601010000.0,114.0,5.0,40395.0,103.0,9349630.0,21202.0,1.205052


### Merge engagement and experience datasets

In [51]:
satisfaction_data = pd.merge(engagement_data[['MSISDN/Number', 'Engagement Score']],
                             experience_data[['MSISDN/Number', 'Experience Score']],
                             on='MSISDN/Number')
satisfaction_data.head()

Unnamed: 0,MSISDN/Number,Engagement Score,Experience Score
0,33601000000.0,0.898944,0.9363
1,33601000000.0,0.84,0.930729
2,33601000000.0,0.874915,0.914617
3,33601010000.0,0.85868,0.980499
4,33601010000.0,0.99299,1.205052


In [52]:
# Calculate the satisfaction score
# The average of engagement and experience scores
satisfaction_data['Satisfaction Score'] = satisfaction_data[['Engagement Score', 'Experience Score']].mean(axis=1)
satisfaction_data.head()


Unnamed: 0,MSISDN/Number,Engagement Score,Experience Score,Satisfaction Score
0,33601000000.0,0.898944,0.9363,0.917622
1,33601000000.0,0.84,0.930729,0.885364
2,33601000000.0,0.874915,0.914617,0.894766
3,33601010000.0,0.85868,0.980499,0.919589
4,33601010000.0,0.99299,1.205052,1.099021


### Top 10 satisfied customers

In [53]:
# Sort by satisfaction score and get the top 10
top_10_satisfied = satisfaction_data.sort_values(by='Satisfaction Score', ascending=False).head(10)
top_10_satisfied

Unnamed: 0,MSISDN/Number,Engagement Score,Experience Score,Satisfaction Score
76363,33675880000.0,2.229469,2.774865,2.502167
37470,33659820000.0,1.614316,3.155061,2.384688
1279,33604520000.0,1.889976,2.823726,2.356851
37052,33659730000.0,2.244949,2.407587,2.326268
6437,33614890000.0,2.501814,2.105876,2.303845
13180,33625780000.0,2.716584,1.804829,2.260706
39120,33660210000.0,1.655723,2.622603,2.139163
666,33603130000.0,1.870669,2.371909,2.121289
13526,33626320000.0,2.445291,1.780977,2.113134
92577,33760410000.0,2.038283,2.080286,2.059284
