In [2]:
# Import necessary libraries and functions
import sys
import os
import pandas as pd

# Add the 'src' and 'scripts' folders to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../src')))
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../scripts')))

# Now import your functions
from user_analysis import (
    get_top_10_handsets, 
    get_top_3_manufacturers, 
    get_top_5_handsets_per_manufacturer, 
    aggregate_user_data
)

# Step 1: Get Top 10 Handsets
print("Fetching the top 10 handsets used by customers...\n")
df_handsets = get_top_10_handsets()
print(df_handsets)

# Step 2: Get Top 3 Handset Manufacturers
print("\nFetching the top 3 handset manufacturers...\n")
df_manufacturers = get_top_3_manufacturers()
print(df_manufacturers)

# Step 3: Get Top 5 Handsets per Top 3 Manufacturers
print("\nFetching the top 5 handsets per top 3 manufacturers...\n")
df_top_handsets_per_manufacturer = get_top_5_handsets_per_manufacturer()
print(df_top_handsets_per_manufacturer)

# Step 4: Aggregate User Data
print("\nAggregating user data (sessions, duration, download/upload data)...\n")
df_user_aggregates = aggregate_user_data()
print(df_user_aggregates.describe())

# Optional: Display the top records for a preview
print("\nPreview of aggregated user data:\n")
print(df_user_aggregates.head())

# Optional: Save the results to a CSV file for further analysis
output_path = os.path.abspath(os.path.join(os.getcwd(), '../output'))
if not os.path.exists(output_path):
    os.makedirs(output_path)

df_handsets.to_csv(os.path.join(output_path, 'top_10_handsets.csv'), index=False)
df_manufacturers.to_csv(os.path.join(output_path, 'top_3_manufacturers.csv'), index=False)
df_top_handsets_per_manufacturer.to_csv(os.path.join(output_path, 'top_5_handsets_per_manufacturer.csv'), index=False)
df_user_aggregates.to_csv(os.path.join(output_path, 'user_aggregates.csv'), index=False)

print("\nData saved successfully to the output folder.")


Fetching the top 10 handsets used by customers...

                   Handset Type  usage_count
0              Huawei B528S-23A        19752
1       Apple iPhone 6S (A1688)         9419
2        Apple iPhone 6 (A1586)         9023
3                     undefined         8987
4        Apple iPhone 7 (A1778)         6326
5       Apple iPhone Se (A1723)         5187
6        Apple iPhone 8 (A1905)         4993
7       Apple iPhone Xr (A2105)         4568
8  Samsung Galaxy S8 (Sm-G950F)         4520
9        Apple iPhone X (A1901)         3813

Fetching the top 3 handset manufacturers...



  df = pd.read_sql_query(query, connection)
  df = pd.read_sql_query(query, connection)


  Handset Manufacturer  usage_count
0                Apple        59565
1              Samsung        40839
2               Huawei        34423

Fetching the top 5 handsets per top 3 manufacturers...



  df = pd.read_sql_query(query, connection)


  Handset Manufacturer             Handset Type  usage_count
0                Apple  Apple iPhone 6S (A1688)         9419
1                Apple   Apple iPhone 6 (A1586)         9023
2                Apple   Apple iPhone 7 (A1778)         6326
3                Apple  Apple iPhone Se (A1723)         5187
4                Apple   Apple iPhone 8 (A1905)         4993

Aggregating user data (sessions, duration, download/upload data)...



  df = pd.read_sql_query(query, connection)


            user_id   num_sessions  total_duration  total_download  \
count  1.068560e+05  106857.000000    1.068570e+05    1.068570e+05   
mean   4.511474e+10       1.394480    1.468438e+05    6.382035e+08   
std    2.889423e+12       1.737797    2.892198e+05    1.558474e+09   
min    3.360100e+10       0.000000    7.142000e+03    8.827082e+06   
25%    3.365088e+10       1.000000    7.130900e+04    3.148302e+08   
50%    3.366365e+10       1.000000    1.027410e+05    5.703713e+08   
75%    3.368344e+10       2.000000    1.727990e+05    8.073701e+08   
max    8.823971e+14     504.000000    7.244640e+07    4.869236e+11   

       total_upload  social_media_data   google_data    email_data  \
count  1.068570e+05       1.068570e+05  1.068570e+05  1.068570e+05   
mean   5.772370e+07       2.520191e+06  8.072645e+06  2.515148e+06   
std    1.400375e+08       6.213715e+06  1.933071e+07  6.118615e+06   
min    2.866892e+06       1.200000e+01  2.070000e+02  9.700000e+01   
25%    3.639554e+07