<a href="https://colab.research.google.com/github/hussainm123/hussainm.github.io/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import display, HTML

# ----------------------------------------------------
# 1. Data Translation and Preparation
# ----------------------------------------------------

# Overview metrics (re-structured for a clean DataFrame)
overview_data = {
    'Metric': ['Total CTR', 'Median CTR', 'Mean CTR'],
    'StudyCheck': [10.37, 9.83, 11.41],
    'studieren.de': [5.42, 10.50, 27.26]
}
overview_df = pd.DataFrame(overview_data)
overview_df.set_index('Metric', inplace=True)

# Top 10 programs by clicks - StudyCheck
study_check_top10 = [
    {'name': 'Creative Comm. & Brand Mgmt', 'clicks': 252, 'views': 2368, 'ctr': 10.64},
    {'name': 'Marketing Intelligence', 'clicks': 224, 'views': 1407, 'ctr': 15.92},
    {'name': 'Corporate Comm. Mgmt', 'clicks': 161, 'views': 1462, 'ctr': 11.01},
    {'name': 'Life Cycle & Sustainability', 'clicks': 160, 'views': 1049, 'ctr': 15.25},
    {'name': 'BWL/Marketing Digital', 'clicks': 159, 'views': 1798, 'ctr': 8.84},
    {'name': 'BWL/Media Mgmt Digital', 'clicks': 148, 'views': 1221, 'ctr': 12.12},
    {'name': 'BBA/Int. Marketing', 'clicks': 141, 'views': 1409, 'ctr': 10.01},
    {'name': 'BWL/Personalmanagement', 'clicks': 137, 'views': 1274, 'ctr': 10.75},
    {'name': 'Taxation, Auditing & Law', 'clicks': 135, 'views': 430, 'ctr': 31.40},
    {'name': 'Human Resources Mgmt', 'clicks': 131, 'views': 902, 'ctr': 14.52}
]
sc_top10_df = pd.DataFrame(study_check_top10)

# Top 10 programs by clicks - studieren.de
studieren_top10 = [
    {'name': 'BWL/Nachhaltigkeit', 'clicks': 364, 'views': 2204, 'ctr': 17.00},
    {'name': 'BBA/Int. Marketing', 'clicks': 320, 'views': 7397, 'ctr': 4.00},
    {'name': 'BBA/Int. Business', 'clicks': 316, 'views': 6401, 'ctr': 5.00},
    {'name': 'BWL/Marketing', 'clicks': 307, 'views': 4957, 'ctr': 6.00},
    {'name': 'Wirtschaftsrecht', 'clicks': 304, 'views': 16381, 'ctr': 2.00},
    {'name': 'Information Systems', 'clicks': 303, 'views': 1814, 'ctr': 17.00},
    {'name': 'BWL/Marketing Digital', 'clicks': 287, 'views': 2709, 'ctr': 11.00},
    {'name': 'BWL/Media Mgmt', 'clicks': 287, 'views': 9047, 'ctr': 3.00},
    {'name': 'Creative Comm. & Brand', 'clicks': 285, 'views': 1810, 'ctr': 16.00},
    {'name': 'Life Cycle & Sustainability', 'clicks': 277, 'views': 2398, 'ctr': 12.00}
]
st_top10_df = pd.DataFrame(studieren_top10)

# CTR distribution visualization
ctr_distribution = [
    {'range': '0-5%', 'StudyCheck': 5, 'studieren.de': 8},
    {'range': '5-10%', 'StudyCheck': 8, 'studieren.de': 9},
    {'range': '10-15%', 'StudyCheck': 7, 'studieren.de': 6},
    {'range': '15-20%', 'StudyCheck': 4, 'studieren.de': 3},
    {'range': '20%+', 'StudyCheck': 2, 'studieren.de': 11}
]
ctr_dist_df = pd.DataFrame(ctr_distribution)

# ----------------------------------------------------
# 2. Key Performance Metrics (Overview)
# ----------------------------------------------------

print("## üìä Portal Performance Analysis: Key Overview Metrics\n")

# Display the overview data table
display(HTML(f"### Overview Metrics Table"))
display(overview_df.style.format("{:.2f}%").set_caption("Comparative CTR Metrics (%)"))

# Key Insight from the original component
print("\n### üí° Key Insight")
print("StudyCheck shows stable, uniform performance (Mean ‚âà Median), while studieren.de exhibits volatile performance driven by many low-volume, high-CTR profiles with statistical anomalies (e.g., 150% CTR on 4 views).")

# Visualize the Overview Metrics (Bar Chart)
overview_long_df = overview_df.reset_index().melt(
    id_vars='Metric', var_name='Portal', value_name='CTR (%)'
)

fig_overview = px.bar(
    overview_long_df,
    x='Metric',
    y='CTR (%)',
    color='Portal',
    barmode='group',
    title='Comparative Overview of CTR Metrics',
    color_discrete_map={'StudyCheck': '#3b82f6', 'studieren.de': '#10b981'},
    text_auto='.2f'
)
fig_overview.update_layout(yaxis_title="CTR (%)", hovermode="x unified")
fig_overview.show()

# ----------------------------------------------------
# 3. Top 10 Programs by Clicks
# ----------------------------------------------------

print("\n---")
print("## ü•á Top 10 Programs by Clicks\n")

# Display StudyCheck Top 10
display(HTML(f"### StudyCheck Top 10 Programs"))
display(sc_top10_df[['name', 'clicks', 'views', 'ctr']].rename(columns={'name': 'Program Name', 'ctr': 'CTR (%)'}).style.format({'clicks': '{:,}', 'views': '{:,}', 'CTR (%)': '{:.2f}%'}).set_properties(**{'background-color': '#e0f2fe'}, subset=['clicks']))

# Visualize StudyCheck Top 10 (Horizontal Bar Chart)
fig_sc_top10 = px.bar(
    sc_top10_df.sort_values(by='clicks', ascending=True),
    x='clicks',
    y='name',
    orientation='h',
    title='StudyCheck: Top 10 Programs by Clicks',
    color='clicks',
    color_continuous_scale=px.colors.sequential.Blues,
    hover_data={'clicks': True, 'views': True, 'ctr': ':.2f'}
)
fig_sc_top10.update_layout(yaxis_title="", xaxis_title="Clicks", coloraxis_showscale=False)
fig_sc_top10.show()

# Display studieren.de Top 10
display(HTML(f"### studieren.de Top 10 Programs"))
display(st_top10_df[['name', 'clicks', 'views', 'ctr']].rename(columns={'name': 'Program Name', 'ctr': 'CTR (%)'}).style.format({'clicks': '{:,}', 'views': '{:,}', 'CTR (%)': '{:.2f}%'}).set_properties(**{'background-color': '#d1fae5'}, subset=['clicks']))

# Visualize studieren.de Top 10 (Horizontal Bar Chart)
fig_st_top10 = px.bar(
    st_top10_df.sort_values(by='clicks', ascending=True),
    x='clicks',
    y='name',
    orientation='h',
    title='studieren.de: Top 10 Programs by Clicks',
    color='clicks',
    color_continuous_scale=px.colors.sequential.Greens,
    hover_data={'clicks': True, 'views': True, 'ctr': ':.2f'}
)
fig_st_top10.update_layout(yaxis_title="", xaxis_title="Clicks", coloraxis_showscale=False)
fig_st_top10.show()

# ----------------------------------------------------
# 4. CTR vs Views Relationship (Scatter Plot)
# ----------------------------------------------------

print("\n---")
print("## üìà CTR vs Views Relationship\n")

# Combined Scatter Plot (Plotly Subplots for a single, powerful image)
fig_scatter = go.Figure()

# StudyCheck Trace
fig_scatter.add_trace(go.Scatter(
    x=sc_top10_df['views'],
    y=sc_top10_df['ctr'],
    mode='markers',
    name='StudyCheck',
    marker=dict(size=12, color='#3b82f6'),
    text=sc_top10_df['name'],
    hovertemplate='<b>%{text}</b><br>Views: %{x:,.0f}<br>CTR: %{y:.2f}%<extra></extra>'
))

# studieren.de Trace
fig_scatter.add_trace(go.Scatter(
    x=st_top10_df['views'],
    y=st_top10_df['ctr'],
    mode='markers',
    name='studieren.de',
    marker=dict(size=12, color='#10b981'),
    text=st_top10_df['name'],
    hovertemplate='<b>%{text}</b><br>Views: %{x:,.0f}<br>CTR: %{y:.2f}%<extra></extra>'
))

fig_scatter.update_layout(
    title='CTR (%) vs Profile Views (Top 10 Programs)',
    xaxis_title='Profile Views (Log Scale for Visibility)',
    yaxis_title='CTR (%)',
    xaxis_type='log',  # Use log scale for views to handle the wide range (430 to 16K)
    hovermode='closest'
)
fig_scatter.show()

print("\n### üîé Scatter Plot Analysis")
print("StudyCheck demonstrates more consistent CTR performance regardless of view volume. studieren.de shows a significant inverse relationship where high-visibility programs (e.g., Wirtschaftsrecht at 16,381 views with 2.00% CTR) have significantly lower CTRs, suggesting potential optimization opportunities for high-traffic profiles.")

# ----------------------------------------------------
# 5. CTR Distribution
# ----------------------------------------------------

print("\n---")
print("##  ÿ™Ÿàÿ≤Ÿäÿπ CTR Distribution Across Programs\n")

# Reshape for Plotly
ctr_dist_long_df = ctr_dist_df.melt(
    id_vars='range', var_name='Portal', value_name='Number of Programs'
)
# Define the correct order for the x-axis
range_order = ['0-5%', '5-10%', '10-15%', '15-20%', '20%+']

fig_dist = px.bar(
    ctr_dist_long_df,
    x='range',
    y='Number of Programs',
    color='Portal',
    barmode='group',
    category_orders={'range': range_order},
    title='Program Count by CTR Range',
    color_discrete_map={'StudyCheck': '#3b82f6', 'studieren.de': '#10b981'}
)
fig_dist.update_layout(xaxis_title="CTR Range", hovermode="x unified")
fig_dist.show()

print("\n### ‚ö†Ô∏è Data Quality Warning and Conclusion")
print("studieren.de contains several programs with CTRs exceeding 100% (e.g., 150%, 125%, 82%) on very low view counts. These are likely tracking errors or data quality issues that significantly skew the mean CTR upward to 27.26%, while the median remains at a more realistic 10.50%. This high variance in the '20%+' range for studieren.de requires careful filtering for reliable analysis.")

## üìä Portal Performance Analysis: Key Overview Metrics



Unnamed: 0_level_0,StudyCheck,studieren.de
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Total CTR,10.37%,5.42%
Median CTR,9.83%,10.50%
Mean CTR,11.41%,27.26%



### üí° Key Insight
StudyCheck shows stable, uniform performance (Mean ‚âà Median), while studieren.de exhibits volatile performance driven by many low-volume, high-CTR profiles with statistical anomalies (e.g., 150% CTR on 4 views).



---
## ü•á Top 10 Programs by Clicks



Unnamed: 0,Program Name,clicks,views,CTR (%)
0,Creative Comm. & Brand Mgmt,252,2368,10.64%
1,Marketing Intelligence,224,1407,15.92%
2,Corporate Comm. Mgmt,161,1462,11.01%
3,Life Cycle & Sustainability,160,1049,15.25%
4,BWL/Marketing Digital,159,1798,8.84%
5,BWL/Media Mgmt Digital,148,1221,12.12%
6,BBA/Int. Marketing,141,1409,10.01%
7,BWL/Personalmanagement,137,1274,10.75%
8,"Taxation, Auditing & Law",135,430,31.40%
9,Human Resources Mgmt,131,902,14.52%


Unnamed: 0,Program Name,clicks,views,CTR (%)
0,BWL/Nachhaltigkeit,364,2204,17.00%
1,BBA/Int. Marketing,320,7397,4.00%
2,BBA/Int. Business,316,6401,5.00%
3,BWL/Marketing,307,4957,6.00%
4,Wirtschaftsrecht,304,16381,2.00%
5,Information Systems,303,1814,17.00%
6,BWL/Marketing Digital,287,2709,11.00%
7,BWL/Media Mgmt,287,9047,3.00%
8,Creative Comm. & Brand,285,1810,16.00%
9,Life Cycle & Sustainability,277,2398,12.00%



---
## üìà CTR vs Views Relationship




### üîé Scatter Plot Analysis
StudyCheck demonstrates more consistent CTR performance regardless of view volume. studieren.de shows a significant inverse relationship where high-visibility programs (e.g., Wirtschaftsrecht at 16,381 views with 2.00% CTR) have significantly lower CTRs, suggesting potential optimization opportunities for high-traffic profiles.

---
##  ÿ™Ÿàÿ≤Ÿäÿπ CTR Distribution Across Programs




studieren.de contains several programs with CTRs exceeding 100% (e.g., 150%, 125%, 82%) on very low view counts. These are likely tracking errors or data quality issues that significantly skew the mean CTR upward to 27.26%, while the median remains at a more realistic 10.50%. This high variance in the '20%+' range for studieren.de requires careful filtering for reliable analysis.
