In [1]:
# Add the parent directory to sys.path
import sys, os
from pathlib import Path

current_directory = os.getcwd()
parent_directory = Path(current_directory).parent.parent
sys.path.insert(0, str(parent_directory))
sys.path

['c:\\Users\\mekbi\\Desktop\\Kifiya\\week-2\\telecom-data-analysis',
 'C:\\Users\\mekbi\\AppData\\Local\\Programs\\Python\\Python312\\python312.zip',
 'C:\\Users\\mekbi\\AppData\\Local\\Programs\\Python\\Python312\\DLLs',
 'C:\\Users\\mekbi\\AppData\\Local\\Programs\\Python\\Python312\\Lib',
 'C:\\Users\\mekbi\\AppData\\Local\\Programs\\Python\\Python312',
 'c:\\Users\\mekbi\\Desktop\\Kifiya\\week-2\\telecom-data-analysis\\venv',
 '',
 'c:\\Users\\mekbi\\Desktop\\Kifiya\\week-2\\telecom-data-analysis\\venv\\Lib\\site-packages',
 'c:\\Users\\mekbi\\Desktop\\Kifiya\\week-2\\telecom-data-analysis\\venv\\Lib\\site-packages\\win32',
 'c:\\Users\\mekbi\\Desktop\\Kifiya\\week-2\\telecom-data-analysis\\venv\\Lib\\site-packages\\win32\\lib',
 'c:\\Users\\mekbi\\Desktop\\Kifiya\\week-2\\telecom-data-analysis\\venv\\Lib\\site-packages\\Pythonwin']

### Load engagement and experience data

In [2]:
# Load engagement data
import pandas as pd

engagement_data = pd.read_pickle('./engagement_data.pkl')
engagement_data.head()

Unnamed: 0,MSISDN/Number,Session Dur. (ms),Session Frequency,Total Data (Bytes)
0,33601000000.0,116720.0,1,878690600.0
1,33601000000.0,181230.0,1,156859600.0
2,33601000000.0,134969.0,1,595966500.0
3,33601010000.0,49878.0,1,422320700.0
4,33601010000.0,37104.0,2,1457411000.0


In [3]:
# Load experience data
experience_data = pd.read_pickle('./experience_data.pkl')
experience_data.head()

Unnamed: 0,MSISDN/Number,Avg RTT DL (ms),Avg RTT UL (ms),Avg Bearer TP DL (kbps),Avg Bearer TP UL (kbps),TCP DL Retrans. Vol (Bytes),TCP UL Retrans. Vol (Bytes),Handset Type
0,33601000000.0,46.0,0.0,37.0,39.0,0.0,0.0,Huawei P20 Lite Huawei Nova 3E
1,33601000000.0,30.0,1.0,48.0,51.0,0.0,0.0,Apple iPhone 7 (A1778)
2,33601000000.0,0.0,0.0,48.0,49.0,0.0,0.0,undefined
3,33601010000.0,69.0,15.0,204.0,44.0,1066.0,0.0,Apple iPhone 5S (A1457)
4,33601010000.0,114.0,5.0,40395.0,103.0,9349630.0,21202.0,Apple iPhone Se (A1723)


In [4]:
experience_data = experience_data.drop('Handset Type', axis=1)
experience_data.columns

Index(['MSISDN/Number', 'Avg RTT DL (ms)', 'Avg RTT UL (ms)',
       'Avg Bearer TP DL (kbps)', 'Avg Bearer TP UL (kbps)',
       'TCP DL Retrans. Vol (Bytes)', 'TCP UL Retrans. Vol (Bytes)'],
      dtype='object')

### Assign scores to each dataset

##### Assign scores to engagement data using Euclidean Distance

In [5]:
# Get the centroids
from scripts import satisfaction_utils

centroids = satisfaction_utils.calculate_centroids(df=engagement_data.drop('MSISDN/Number', axis=1))
centroids

INFO:scripts.satisfaction_utils:Data normalization successful
INFO:scripts.satisfaction_utils:KMeans clustering successful
INFO:scripts.satisfaction_utils:Centroids calculated for 3 clusters


array([[ 2.73387952,  3.71325469,  3.24708014],
       [-0.23130937, -0.45376432, -0.39897854],
       [ 0.36878701,  0.97567411,  0.86060373]])

In [6]:
# Get the least engaged clusters centroid
least_engaged_centroid = centroids[1]
least_engaged_centroid

array([-0.23130937, -0.45376432, -0.39897854])

In [7]:
# Normalize the engagement data
from scripts import enagagement_utils

normalized_engagement_data = enagagement_utils.normalize_data(df=engagement_data.drop('MSISDN/Number', axis=1))
normalized_engagement_data

array([[0.00590825, 0.05555556, 0.09593143],
       [0.00938651, 0.05555556, 0.01402598],
       [0.0068922 , 0.05555556, 0.06385101],
       ...,
       [0.04692194, 0.05555556, 0.02256615],
       [0.01325784, 0.05555556, 0.06388747],
       [0.04651534, 0.05555556, 0.0120169 ]])

In [10]:
# Calculate engagement score for each user
engagement_scores = []
for user_data in normalized_engagement_data:
    score = satisfaction_utils.euclidean_distance(data=user_data,
                                                  centroid=least_engaged_centroid)
    engagement_scores.append(score)

engagement_scores[:10]

[np.float64(0.7487421574482148),
 np.float64(0.6985083949476356),
 np.float64(0.7282567643519451),
 np.float64(0.7143828277907733),
 np.float64(0.829207218440659),
 np.float64(0.7714664256227064),
 np.float64(0.7720632502592477),
 np.float64(0.7087692185419597),
 np.float64(0.8019232208420178),
 np.float64(0.7380210571261697)]

In [11]:
# Add the scores to the engagement dataframe
engagement_data['Engagement Score'] = engagement_scores
engagement_data.head()

Unnamed: 0,MSISDN/Number,Session Dur. (ms),Session Frequency,Total Data (Bytes),Engagement Score
0,33601000000.0,116720.0,1,878690600.0,0.748742
1,33601000000.0,181230.0,1,156859600.0,0.698508
2,33601000000.0,134969.0,1,595966500.0,0.728257
3,33601010000.0,49878.0,1,422320700.0,0.714383
4,33601010000.0,37104.0,2,1457411000.0,0.829207
