In [None]:
import pandas as pd
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../src')))
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../utils')))
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../scripts')))
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../databases')))
from connections.database_connector import DatabaseConnection
from data_loader.teleco_data_loader import TelecoDataLoader
from cleaning.data_cleaning import DataCleaner
from engagement_analysis.telecom_engagement_analysis import TelecomEngagementAnalysis
from satisfaction_analysis.engagement_experience_scores import EngagementExperienceScores
from satisfaction_analysis.top_satifactions_analysis import TopSatisfactionAnalysis

from satisfaction_analysis.satisfaction_kmeans import SatisfactionKMeans
from experience_analytics.experience_clustering import ExperienceClustering


from export_to_db import DbExporter


In [None]:
#  Initialize and connect to the database
db_connection = DatabaseConnection(
    db_name=os.getenv('DB_NAME'),
    user=os.getenv('DB_USER'),
    password=os.getenv('DB_PASSWORD'),
    host=os.getenv('DB_HOST'),
    port=os.getenv('DB_PORT')
)
db_connection.connect()

In [None]:
# Initialize the TelecoDataLoader object with the database connection
data_loader = TelecoDataLoader(db_connection=db_connection)

# Load the data into a pandas DataFrame
df = data_loader.load_data("xdr_data")
# df = pd.DataFrame(data)

print(type(df))
df.head()
# print(df.head())

# df.info()
# Close the database connection when done
db_connection.close()

In [None]:
# Initialize the DataCleaner object with the DataFrame
data_cleaner = DataCleaner(df)

# Clean the data
data_cleaner.clean_data()  # No need to assign, as it modifies df in place

#change kbs and bytes to mb
data_cleaner.convert_units_to_mb()

# Handle missing values and outliers
data_cleaner.handle_missing_and_outliers()

# Access the cleaned DataFrame
cleaned_df = data_cleaner.df


In [None]:
# Instantiate the TelecomEngagementAnalysis class
engagement_analysis = TelecomEngagementAnalysis(data=cleaned_df)

# Aggregate metrics by customer
engagement_data = engagement_analysis.aggregate_metrics_by_customer()

# Normalize metrics
normalized_data = engagement_analysis.normalize_metrics()

# Perform K-Means clustering
engagement_data_with_clusters = engagement_analysis.k_means_clustering(n_clusters=3)

In [None]:
# Instantiate the ExperienceClustering class
experience_clustering = ExperienceClustering(df=cleaned_df)

# Run the entire clustering process
experience_clustering.run()

# Retrieve clustered data
experience_data = experience_clustering.df[['MSISDN/Number', 'Cluster']]

In [None]:
# Merge engagement and experience data
user_df = pd.merge(engagement_data_with_clusters, experience_data, on='MSISDN/Number', how='inner')

# Rename 'Cluster' from experience_data to 'experience_cluster'
user_df.rename(columns={'Cluster': 'experience_cluster'}, inplace=True)

# Instantiate the EngagementExperienceScores class
engagement_clusters = engagement_analysis.kmeans  # Assuming `kmeans` is the clustering model
experience_clusters = experience_clustering.kmeans  # Assuming `kmeans` is the clustering model

satisfaction_analysis = EngagementExperienceScores(
    user_data=user_df,
    engagement_clusters=engagement_clusters,
    experience_clusters=experience_clusters
)

# Assign scores to users
user_scores_df = satisfaction_analysis.assign_scores_to_users()

# Display results

In [None]:
satisfaction_analysis = TopSatisfactionAnalysis(
    user_data=user_df,
    engagement_clusters=engagement_clusters,
    experience_clusters=experience_clusters
)

satisfied_customers = satisfaction_analysis.satisfied_customers()
satisfied_customers.head()

In [None]:
# Instantiate the SatisfactionKMeans class
kmeans_analysis = SatisfactionKMeans(data=user_scores_df)

# Preprocess data and run K-means
kmeans_analysis.preprocess_data()
clustered_data = kmeans_analysis.run_kmeans(k=2)

# Visualize the clusters
kmeans_analysis.visualize_clusters()

# Display the resulting DataFrame with cluster labels
print(clustered_data.head())

In [None]:
selected_columns = clustered_data[['MSISDN/Number', 'engagement_score', 'experience_score']]
print(selected_columns.head())


In [None]:
from data_export.data_to_export import DataToExport

In [None]:
data_to_export = DataToExport(satisfied_customers, selected_columns)
final_df = data_to_export.merge_data()
print (final_df)

In [None]:

# Initialize the PostgresExporter
db_exporter = DbExporter()
db_exporter.export_to_postgres(final_df)
