In [None]:
# Importing necessary libraries
from scripts.data_processing import load_data, clean_data, segment_users, get_top_handsets, get_top_manufacturers, get_top_handsets_per_manufacturer
from scripts.eda_functions import basic_metrics, compute_dispersion, bivariate_analysis, compute_correlation_matrix
from scripts.visualizations import plot_univariate, plot_bivariate, plot_correlation_matrix
from scripts.pca_functions import apply_pca, interpret_pca
from scripts.db_connection import get_data_from_postgres


In [None]:
# Define database configuration
db_config = {
    'host': 'localhost',
    'database': 'telecom_data',
    'user': 'postgres',
    'password': 'Lkj;Asdf!@'
}

# Define SQL query
query = "SELECT * FROM xdr_data"

# Get data from PostgreSQL
df = get_data_from_postgres(query, db_config)

In [None]:
# Clean the dataset
df = clean_data(df)

In [None]:
# Identify top 10 handsets
top_handsets = get_top_handsets(df)
print(top_handsets)


In [None]:
# Identify top 3 manufacturers
top_manufacturers = get_top_manufacturers(df)
print(top_manufacturers)

In [None]:
# Identify top 5 handsets per top 3 manufacturers
top_handsets_manufacturers = get_top_handsets_per_manufacturer(df, top_manufacturers.index)
print(top_handsets_manufacturers)

In [None]:
# Segment users and calculate total data
df = segment_users(df, 'Activity Duration DL (ms)')
print(df.head())

In [None]:
# Perform basic EDA
metrics = basic_metrics(df)
print(metrics)


In [None]:
# Univariate analysis
plot_univariate(df, 'Activity Duration DL (ms)')
plot_bivariate(df, 'Avg Bearer TP DL (kbps)', 'Avg Bearer TP UL (kbps)')
plot_correlation_matrix(df)

In [None]:
# Assuming total data as the sum of total download and upload bytes
df['total_data'] = df['Total DL (Bytes)'] + df['Total UL (Bytes)']

# Perform bivariate analysis
features = ['Social Media DL (Bytes)', 'Google DL (Bytes)', 'Email DL (Bytes)', 
            'Youtube DL (Bytes)', 'Netflix DL (Bytes)', 'Gaming DL (Bytes)', 
            'Other DL (Bytes)', 'Social Media UL (Bytes)', 'Google UL (Bytes)', 
            'Email UL (Bytes)', 'Youtube UL (Bytes)', 'Netflix UL (Bytes)', 
            'Gaming UL (Bytes)', 'Other UL (Bytes)']

bivariate_results = bivariate_analysis(df, 'total_data', features)
print(bivariate_results)

In [None]:
# Correlation matrix for app data
corr_matrix = compute_correlation_matrix(df, features)
plot_correlation_matrix(corr_matrix)

In [None]:
# Apply PCA for dimensionality reduction
pca_results, explained_variance = apply_pca(df, features, n_components=2)
interpret_pca(pca_results, explained_variance)