In [1]:
import os
import sys
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import pairwise_distances
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sqlalchemy import create_engine

# Get the current working directory
current_dir = os.getcwd()

# Append the parent directory to sys.path
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)


# ignore warrnings
import warnings
warnings.filterwarnings("ignore")

: 

In [None]:
# import Postgres connection from DB_connection folder
from DB_connection.connection import PostgresConnection
from scripts.satisfaction_analyzer import SatisfactionAnalyer
from scripts.engagement_analyser import EngagementAnalyzer
from scripts.experience_analyzer import ExperienceAnalyzer

: 

In [None]:
# Create an instance of the PostgresConnection class and connect to the database
db=PostgresConnection()
db.connect()

: 

In [None]:
# query to retrive all the data from xdr_data table
query='select * from xdr_data'
db.execute_query(query)

: 

In [None]:
# Assign the data to a dataframe named 'df'
df=db.fetch_data(query)
df.head()

: 

### User Engagement 

In [None]:
engagement_analyser=EngagementAnalyzer(df)
satisfaction_analyser=SatisfactionAnalyer(df)
experience_analyser=ExperienceAnalyzer(df)

: 

In [None]:
# user_engagement_df=engagement_analyser.prepare_user_engagement_data(df)

: 

In [None]:
user_engagement_df = satisfaction_analyser.user_engagement(df)

: 

In [None]:
user_engagement_df

: 

In [None]:
# Selecting only the relevant columns for normalization
metrics = ['Session Frequency', 'Total Duration', 'Total Traffic (Bytes)']

least_engaged_cluster=satisfaction_analyser.get_least_engaged_cluster(user_engagement_df,'Engagement Cluster',metrics)


: 

#### User Experiences

In [None]:
user_experience_df=pd.read_csv("user_experience_data.csv")
user_experience_df.drop(columns=['Handset Type']).head()

: 

In [None]:
metrics=['Avg TCP DL Retransmission', 'Avg RTT DL', 'Avg Throughput DL']
worst_experience_cluster = satisfaction_analyser.get_worst_experience_cluster(user_experience_df,'Experience Cluster',metrics)

: 

In [None]:
# We have have the user engagement data (user_engagement_df) and experience data (user_experience_df) from Task-2 and Task-3

# Step 1: Calculate the centroids of the least engaged cluster and worst experience cluster
least_engaged_centroid = user_engagement_df[user_engagement_df['Engagement Cluster'] == least_engaged_cluster].mean().values
# Get the centroid for the worst experience cluster
worst_experience_centroid = user_experience_df[user_experience_df['Experience Cluster'] == worst_experience_cluster][metrics].mean().values

: 

In [None]:
worst_experience_centroid

: 

In [None]:
user_engagement_df, user_experience_df = satisfaction_analyser.calculate_scores(user_engagement_df,user_experience_df)

: 

In [None]:
user_engagement_df

: 

In [None]:
user_experience_df

: 

In [None]:
# Calculate satisfaction score and report top 10 satisfied customers
merged_df, top_10_satisfied = satisfaction_analyser.calculate_satisfaction(user_engagement_df,user_experience_df)

: 

In [None]:
# Merged dataframe with the same index
merged_df

: 

In [None]:
# display the top 10 rows of the DataFrame
top_10_satisfied
  

: 

In [None]:
# Plot the top 10 satisfied customers
satisfaction_analyser.plot_top_10_satisfied(top_10_satisfied)

: 

In [None]:
model, coefficients, intercept = satisfaction_analyser.build_regression_model(merged_df)

: 

In [None]:
# K-means clustering on the engagement & experience scores
merged_df, kmeans_model = satisfaction_analyser.perform_kmeans_clustering(merged_df)

: 

In [None]:
#  Display merged data after clustering
merged_df

: 

In [None]:
# Aggregate the average satisfaction & experience score per cluster
cluster_aggregation = satisfaction_analyser.aggregate_scores_per_cluster(merged_df)
print(cluster_aggregation)

: 

In [None]:
# Task 4.4 - K-means clustering on the engagement & experience scores
X_cluster = merged_df[['Engagement Score', 'Experience Score']]
scaler = StandardScaler()
X_cluster_scaled = scaler.fit_transform(X_cluster)
kmeans = KMeans(n_clusters=2, random_state=42)
merged_df['Cluster'] = kmeans.fit_predict(X_cluster_scaled)

# Task 4.5 - Aggregate the average satisfaction & experience score per cluster
cluster_aggregation = merged_df.groupby('Cluster').agg({
    'Satisfaction Score': 'mean',
    'Experience Score': 'mean'
})
cluster_aggregation

: 

#### Export user satsfaction score into postgreSQL data base

In [None]:
db_config = {
    'user': 'postgres',
    'password': 'postgres',
    'host': 'localhost',  # or your MySQL server address
    'database': 'teleco'
}
user_score_data = merged_df[['MSISDN/Number', 'Engagement Score', 'Experience Score', 'Satisfaction Score']]
satisfaction_analyser.export_to_mysql(user_score_data,table_name='user_score',db_config=db_config)

: 

In [None]:
user_score_data

: 

: 