In [None]:
import os
import pandas as pd
from dotenv import load_dotenv # Used to securely load environment variables from a .env file.
from sqlalchemy import create_engine # Provides tools for connecting to and interacting with SQL databases.
from urllib.parse import quote_plus # Ensures that special characters in the database password are safely encoded for use in the connection string.

In [None]:
load_dotenv()
password = quote_plus(os.getenv("DB_PASSWORD")) # The password should be stored in the .env file

In [None]:
engine = create_engine(f"mysql+pymysql://root:{password}@localhost/case_clara")

In [None]:
df_metrics = pd.read_csv('../data/funnel_metrics.csv')

In [None]:
df_metrics.head()

In [None]:
df_metrics.info() # Date as object

In [None]:
df_metrics['date'] = pd.to_datetime(df_metrics['date'])

In [None]:
df_metrics["dropoff_home_to_search"] = df_metrics["home_visits"] - df_metrics["searches"]
df_metrics["dropoff_search_to_payment"] = df_metrics["searches"] - df_metrics["payments"]
df_metrics["dropoff_payment_to_confirmation"] = df_metrics["payments"] - df_metrics["confirmations"]

In [None]:
dropoff_avg = df_metrics.groupby(['device', 'sex'])[
    ['dropoff_home_to_search',
     'dropoff_search_to_payment',
     'dropoff_payment_to_confirmation']
].mean().reset_index()

In [None]:
dropoff_avg

In [None]:
device_conversion = df_metrics.groupby('device')[
    ['search_conversion_rate',
     'payment_conversion_rate',
     'confirmation_conversion_rate',
     'overall_conversion_rate']
].mean().reset_index()

In [None]:
device_conversion

In [None]:
sex_conversion = df_metrics.groupby('sex')[
    ['search_conversion_rate', 'payment_conversion_rate', 'confirmation_conversion_rate', 'overall_conversion_rate']
].mean().reset_index()

In [None]:
sex_conversion

In [None]:
mobile = df_metrics[df_metrics['device'] == 'Mobile']
mobile_conversion = mobile[
    ['search_conversion_rate', 'payment_conversion_rate', 'confirmation_conversion_rate', 'overall_conversion_rate']
].mean()

In [None]:
mobile_conversion

In [None]:
overall_avg_conversion = df_metrics["overall_conversion_rate"].mean()

In [None]:
overall_avg_conversion

In [None]:
best_segment = df_metrics.sort_values('overall_conversion_rate', ascending=False).head(10)

In [None]:
best_segment

In [None]:
weekly_trend = df_metrics.groupby(pd.Grouper(key='date', freq='W'))[
    'overall_conversion_rate'
].mean().reset_index()

In [None]:
weekly_trend

In [None]:
monthly_trend = df_metrics.groupby(pd.Grouper(key='date', freq='ME'))[
    'overall_conversion_rate'
].mean().reset_index()

In [None]:
monthly_trend

In [None]:
print(">>> AVERAGE DROPOFF BY STAGE:")
print(dropoff_avg)

print("\n>>> AVERAGE CONVERSION BY DEVICE:")
print(device_conversion)

print("\n>>> AVERAGE CONVERSION BY SEX:")
print(sex_conversion)

print(f"\n>>> OVERALL AVERAGE CONVERSION: {overall_avg_conversion:.2f}%")

print("\n>>> CONVERSIONS FOR MOBILE USERS:")
print(mobile_conversion)

print("\n>>> BEST SEGMENTS (HIGHEST OVERALL CONVERSION):")
print(best_segment[['date', 'device', 'sex', 'overall_conversion_rate']])

print("\n>>> WEEKLY CONVERSION TREND:")
print(weekly_trend)

print("\n>>> MONTHLY CONVERSION TREND:")
print(monthly_trend)