In [None]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.linear_model import LinearRegression



In [None]:
database_name = 'processed_telecom'
table_name= 'xdr_data'

connection_params = { "host": "localhost", "user": "postgres", "password": "00000000",
                    "port": "5432", "database": database_name}

engine = create_engine(f"postgresql+psycopg2://{connection_params['user']}:{connection_params['password']}@{connection_params['host']}:{connection_params['port']}/{connection_params['database']}")

# str or SQLAlchemy Selectable (select or text object)
sql_query = 'SELECT * FROM xdr_data '

df = pd.read_sql(sql_query, con= engine)

In [None]:
 #Perform Clustering
kmeans = KMeans(n_clusters=3)  # Choose an appropriate value of k
df['cluster'] = kmeans.fit_predict(df[['Avg RTT DL (ms)', 'Avg Bearer TP DL (kbps)', 'TCP DL Retrans. Vol (Bytes)']])  # Adjust features accordingly

In [None]:


# Calculate Centroids
cluster_centers = kmeans.cluster_centers_

# Step 3: Calculate Distance to Centroids for Each User
def calculate_distance(row):
    user_point = np.array(row[['Avg RTT DL (ms)', 'Avg Bearer TP DL (kbps)', 'TCP DL Retrans. Vol (Bytes)']])
    distances = [np.linalg.norm(user_point - centroid) for centroid in cluster_centers]
    return min(distances)  # Choose the least distance

df['engagement_score'] = df.apply(calculate_distance, axis=1)

# Step 4: Calculate Centroids for Worst Experience Cluster
worst_experience_cluster = np.argmin([np.linalg.norm(cluster_center - np.mean(cluster_centers, axis=0)) for cluster_center in cluster_centers])
cluster_centers_worst_experience = cluster_centers[worst_experience_cluster]

# Step 5: Calculate Experience Score
df['experience_score'] = df.apply(lambda row: np.linalg.norm(row[['Avg RTT DL (ms)', 'Avg Bearer TP DL (kbps)', 'TCP DL Retrans. Vol (Bytes)']] - cluster_centers_worst_experience), axis=1)



# Print or use df['engagement_score'] for further analysis
#print(df[['MSISDN/Number', 'engagement_score', 'experience_score']])


In [None]:
# Sort DataFrame based on 'engagement_score'
df_sorted_engagement = df.sort_values(by='engagement_score', ascending=True)

# Select top 5 and bottom 5 rows for engagement score
top_5_engagement = df_sorted_engagement.head(5)
bottom_5_engagement = df_sorted_engagement.tail(5)

# Sort DataFrame based on 'experience_score'
df_sorted_experience = df.sort_values(by='experience_score', ascending=True)

# Select top 5 and bottom 5 rows for experience score
top_5_experience = df_sorted_experience.head(5)
bottom_5_experience = df_sorted_experience.tail(5)

# Print or use the selected rows for further analysis
print("Top 5 by engagement score:")
print(top_5_engagement[['MSISDN/Number', 'engagement_score', 'experience_score']])
print("\nBottom 5 by engagement score:")
print(bottom_5_engagement[['MSISDN/Number', 'engagement_score', 'experience_score']])

print("\nTop 5 by experience score:")
print(top_5_experience[['MSISDN/Number', 'engagement_score', 'experience_score']])
print("\nBottom 5 by experience score:")
print(bottom_5_experience[['MSISDN/Number', 'engagement_score', 'experience_score']])


In [None]:
# Calculate satisfaction score (average of engagement and experience scores)
df['satisfaction_score'] = (df['engagement_score'] + df['experience_score']) / 2

# Report top 10 satisfied customers
top_10_satisfied = df.nsmallest(10, 'satisfaction_score')
print(top_10_satisfied[['MSISDN/Number', 'satisfaction_score']])


In [None]:
X_train = df[['Avg RTT DL (ms)', 'Avg Bearer TP DL (kbps)', 'TCP DL Retrans. Vol (Bytes)']]
y_train = df['satisfaction_score']

# Now, you can train your linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict satisfaction score
df['predicted_satisfaction_score'] = model.predict(X_train)

In [None]:
from sklearn.cluster import KMeans
X_engagement_experience = df[['engagement_score', 'experience_score']]
# Assuming X_engagement_experience contains engagement and experience scores
kmeans = KMeans(n_clusters=2)
df['satisfaction_cluster'] = kmeans.fit_predict(X_engagement_experience)


In [None]:
cluster_scores = df.groupby('satisfaction_cluster').agg({
    'satisfaction_score': 'mean',
    'experience_score': 'mean'
})
cluster_scores.head(5)

In [None]:
import pymysql
from sqlalchemy import create_engine

# Connect to MySQL database
engine = create_engine('mysql+pymysql://username:password@localhost/database_name')

# Export DataFrame to MySQL database
df.to_sql('satisfaction_scores', con=engine, if_exists='replace', index=False)
