In [None]:
# Step 1: Import libraries
import os
import requests
import pandas as pd
from bs4 import BeautifulSoup
from sqlalchemy import create_engine
from dotenv import load_dotenv

# %%
# Step 2: Load environment variables
load_dotenv()

pg_user = os.environ['PG_USER']
pg_password = os.environ['PG_PASSWORD']
pg_host = os.environ['PG_HOST']
pg_port = os.environ['PG_PORT']
pg_db = os.environ['PG_DB']

In [None]:
# Step 3: Create engine
from sqlalchemy import create_engine

engine = create_engine(
    f'postgresql+psycopg2://{pg_user}:{pg_password}@{pg_host}:{pg_port}/{pg_db}'
)

## Diagnostic

### Business Question
Which industries demonstrate the greatest improvement in phishing training success from month 0 to month 12, and how do they rank?


In [None]:
query1 = '''
WITH improvement_cte AS (
    SELECT 
        i.industry_name,
        its.month_0,
        its.month_12,
        (its.month_12 - its.month_0) AS improvement
    FROM raw.industry_training_success its
    JOIN raw.dim_industry i ON its.industry_id = i.industry_id
),
ranked_improvements AS (
    SELECT *,
        RANK() OVER (ORDER BY improvement DESC) AS rank_by_improvement
    FROM improvement_cte
)
SELECT *
FROM ranked_improvements
ORDER BY rank_by_improvement

)
SELECT *
FROM ranked_improvements
ORDER BY rank_by_improvement
'''

df_q1 = pd.read_sql(query1, con=engine)
df_q1.reset_index(drop=True, inplace=True)
df_q1


### Insight
The *Financial Services* industry experienced the greatest improvement in phishing training success over a 12-month period, increasing from 48% to 74% — a 26-point gain. Meanwhile, *Pharma & healthcare* showed the least progress, improving only 10 points. This highlights a significant disparity in training effectiveness across industries.

### Recommendation
Prioritize additional support and tailored training for industries with lower improvement rates such as *Pharma & healthcare* and *IT, software, internet*. Investigate possible causes like engagement, delivery methods, or content relevance to enhance outcomes.

### Prediction
If current patterns hold, top-performing industries may stabilize, while lower-performing sectors risk stagnation or decline unless proactive measures are taken. Focused interventions could help bridge the performance gap in the coming year.