# User Overview Analysis

### Import Libraries and Modules

In [None]:
import os
import sys
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Define the path to the src directory
src_dir = os.path.abspath(os.path.join(os.getcwd(), '..', 'src'))
sys.path.insert(0, src_dir)

if 'data_loader' in sys.modules:
    del sys.modules['data_loader']
if 'user_overview' in sys.modules:
    del sys.modules['user_overview']

from data_loader import DataLoader
from user_overview import UserOverview



### Load Data from PostgreSQL and create a UserOverview Object

In [None]:
loader = DataLoader()
df = loader.load_data("SELECT * FROM public.xdr_data")
cleaned_df = loader.clean_data(df)

df = cleaned_df
user_overview = UserOverview(df)

### Summary Statistics

In [None]:
statistics = user_overview.describe_dataset()
statistics

### Top Handsets and Manufacturers


In [None]:
user_overview.plot_top_handset_types()

In [None]:
user_overview.plot_top_handset_manufacturers()

In [None]:
user_overview.plot_top_handsets_per_manufacturer()

### Aggregates xDR

In [None]:
sessions = df.groupby('MSISDN/Number').size().reset_index(name='Count')

# sessions.shape
sessions['Count'].value_counts().head(2)

In [None]:
# Aggregates per user
aggregates_per_users = df.groupby('MSISDN/Number').agg({
    # Total or average session duration
    'Dur. (ms)': 'sum',
    
    # Summing up the columns listed in columns_to_sum
    **{col: 'sum' for col in columns_media}
}).reset_index()
                 

In [None]:
# Add a new column for total application data volume (in Bytes) for each session per user
aggregates_per_users['Total Data Volume (Bytes)'] = aggregates_per_users[
    [col for col in columns_media if 'DL' in col or 'UL' in col]
].sum(axis=1)

# Display the aggregated DataFrame
aggregates_per_users.head()

## Decile top 5

In [None]:
decile_aggregates = user_overview.segment_and_compute_decile()
print(decile_aggregates)

### Univariate Non-Graphical

In [None]:
uvt_analysis = user_overview.univariate_analysis()
uvt_analysis

### Univariate Analysis Graphical

In [None]:
user_overview.graphical_univariate_analysis()

### Bivariant Analysis

In [None]:
bvt_analysis = user_overview.bivariate_analysis()
bvt_analysis

### Correlation Matrix

In [None]:
corr_matrix = user_overview.plot_correlation_matrix()

### PCA Analysis

In [None]:
pca_analysis = user_overview.pca_analysis()
pca_analysis