# User Satsfaction Score Prediction

In [5]:
import pandas as pd
import os
import sys
# Add the 'scripts' directory to the Python path for module imports
sys.path.append(os.path.abspath(os.path.join('..', 'scripts')))

In [6]:
# Load the xdr_cleaned dataset
data = pd.read_csv('../data/xdr_cleaned.csv')

In [7]:
# Import modules
from user_engagement_analysis import UserEngagementAnalysis
from experience_analytics import ExperienceAnalytics
from satisfaction_analytics import UserSatisfactionAnalytics

Connection to database successful!


In [9]:
# Create instances of engagement and user experience classes
engagement_analysis = UserEngagementAnalysis(data)
experience_analysis = ExperienceAnalytics(data)
satisfaction_analysis = UserSatisfactionAnalytics()

In [10]:
# Normalize and cluster
# Aggregate engagement metrics
engagement_analysis.aggregate_metrics()
engagement_analysis.metrics.head()
engagement_analysis.normalize_and_cluster(n_clusters=3)

In [13]:
# Computer Engagement Score
engagement_features = [
    'total_session_duration',
    'total_download_traffic',
    'total_upload_traffic', 
    'sessions_frequency']

# Identify least engaged cluster
least_engaged_cluster = engagement_analysis.cluster_centers[engagement_features].sum(axis=1).idxmin()

# Compute engagement scores
engagement_scores = satisfaction_analysis.compute_score(
    engagement_analysis.metrics, 
    engagement_analysis.cluster_centers, 
    engagement_features, 'Engagement_Score',
    least_engaged_cluster)

In [14]:
#Check for duplicates in the 'MSISDN/Number' column and count them
duplicate_count = engagement_analysis.metrics.duplicated('MSISDN/Number').sum()
print(f"Number of duplicate entries in 'MSISDN/Number': {duplicate_count}")

Number of duplicate entries in 'MSISDN/Number': 0


In [15]:
engagement_scores.head(20)

Unnamed: 0,MSISDN/Number,Engagement_Score
0,33613700000.0,3376757000000.0
1,33613700000.0,185107200.0
2,33613700000.0,941611600.0
3,33613700000.0,473926600.0
4,33613700000.0,61413630.0
5,33613700000.0,597157200.0
6,33613710000.0,154660700.0
7,33613710000.0,21795640.0
8,33613710000.0,344211300.0
9,33613710000.0,135566200.0


In [16]:
# User experience clusters
# Define features
experience_features = ['TCP Retransmission', 'RTT', 'Throughput']

# aggregate and cluster
experience_df, cluster_centers = experience_analysis.k_means_clustering(experience_features)
# rename cluster column
experience_df.rename(columns={'Cluster':'cluster'}, inplace=True)
cluster_centers.rename(columns={'Cluster':'cluster'}, inplace=True)
# Identify worst experience cluster
worst_cluster = cluster_centers[experience_features].sum(axis=1).idxmin()

In [None]:
# Computer User experience scores
experience_scores = satisfaction_analysis.compute_score(
    experience_df, 
    cluster_centers, 
    experience_features, 'Experience_Score',
    worst_cluster)