## Top Funnel Analytics
#### Focused on views, likes, comments
#### Need to get video category (topic) AND Guest(s) extracted first

In [172]:
import pandas as pd
from tqdm.notebook import tqdm


df = pd.read_csv('/Users/riadanas/Desktop/MLE Diary of a CEO/data/raw/2025_all_pod.csv')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [173]:
#### CREATE "date_since_published" COLUMN ####
from datetime import datetime, timezone

# Ensure the column is parsed as datetime
df["video_published_at"] = pd.to_datetime(df["video_published_at"], utc=True, errors="coerce")

# Get today's date in UTC
today = datetime.now(timezone.utc)

# Compute the number of days since publication
df["days_since_published"] = (today - df["video_published_at"]).dt.days

# Optional sanity check
print(df[["video_id", "video_published_at", "days_since_published"]].head())


      video_id        video_published_at  days_since_published
0  eOnIWDMNyfE 2025-01-27 08:00:37+00:00                   256
1  eOnIWDMNyfE 2025-01-27 08:00:37+00:00                   256
2  eOnIWDMNyfE 2025-01-27 08:00:37+00:00                   256
3  eOnIWDMNyfE 2025-01-27 08:00:37+00:00                   256
4  eOnIWDMNyfE 2025-01-27 08:00:37+00:00                   256


In [174]:
print(df.shape)
df.head(2)

(75574, 19)


Unnamed: 0,channel_name,channel_id,video_id,video_title,video_description,video_published_at,view_count,video_like_count,comment_count,comment_id,comment_text,author,author_id,comment_like_count,comment_published_at,is_pinned,is_reply,parent_comment_id,days_since_published
0,The Diary Of A CEO,UCGq-a57w-aPwyi3pW7XLiHw,eOnIWDMNyfE,The Nutritional Scientist: Do Not Eat After 9pm! Link Between Chewing & Belly Fat!,"Dr Sarah Berry is a Professor in the Department of Nutritional Sciences at King's College London and Chief Scientist at ZOE. She is also the lead nutritional scientist on the PREDICT program, the world’s largest in-depth nutritional research programme. 00:00 Intro 02:04 Two Decades Studying the Impact of Food on Our Health 04:52 What Is the Food Matrix and How Does It Affect Our Health? 07:03 Why Do We Need Processed Food? 08:15 When Is Processed Food Bad? 11:09 How Long Does It Take for My Br",2025-01-27 08:00:37+00:00,749464,15169,3700,UgzXObkDgCKvQyFcSYB4AaABAg,"Seed oil is not bad for you?? At this point, we need a panel discussion with all of these scientists, biochemist, and nutritionists. Also, we need to know who all these people are sponsored by or paid by because that makes the difference in the message or their so-called narrative!",@NaeNae23,UCg5Kj8O8iYjICUwUkPYwacQ,3576,2025-01-27T08:34:47Z,False,False,,256
1,The Diary Of A CEO,UCGq-a57w-aPwyi3pW7XLiHw,eOnIWDMNyfE,The Nutritional Scientist: Do Not Eat After 9pm! Link Between Chewing & Belly Fat!,"Dr Sarah Berry is a Professor in the Department of Nutritional Sciences at King's College London and Chief Scientist at ZOE. She is also the lead nutritional scientist on the PREDICT program, the world’s largest in-depth nutritional research programme. 00:00 Intro 02:04 Two Decades Studying the Impact of Food on Our Health 04:52 What Is the Food Matrix and How Does It Affect Our Health? 07:03 Why Do We Need Processed Food? 08:15 When Is Processed Food Bad? 11:09 How Long Does It Take for My Br",2025-01-27 08:00:37+00:00,749464,15169,3700,UgwPRXvRdUbJsOc6_4F4AaABAg,Now Dr. Eric Berg must be the next guest to review this interview. Period!,@saiqasajjad4849,UCDOjl0Zpd7SR78eHNHfIu6A,777,2025-01-27T16:37:31Z,False,False,,256


### Guest Name Processing - GPT-4

In [175]:
import os
import json
import re
import pandas as pd
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=os.getenv("OPENROUTER_API_KEY"),
)

def get_guest_names_openrouter(description: str) -> list:
    """
    Extract true podcast guest names from a YouTube description using OpenRouter (Claude 3.5 / GPT-4-mini).
    Ignores names used as references or examples.
    """
    if not isinstance(description, str) or not description.strip():
        return []

    prompt = f"""
    You are a podcast metadata assistant.

    Task:
    - Read the YouTube video description carefully.
    - Identify ONLY the actual guest(s) who appear in the episode or are directly interviewed.
    - Ignore people mentioned just as examples, comparisons, or references (e.g., Warren Buffett, Elon Musk).
    - If multiple guests appear, include all of them.
    - Preserve professional titles (e.g., "Dr", "Prof", "Sir") if present.
    - Return a clean JSON list of guest names, for example:
      ["Morgan Housel"]
      or ["Dr Andrew Huberman", "Lex Fridman"]
    - If no guest is clearly identified, return an empty list [].

    Description:
    \"\"\"{description}\"\"\"
    """

    try:
        completion = client.chat.completions.create(
            model="anthropic/claude-3.5-sonnet",  # you can change to "openai/gpt-4o-mini"
            messages=[{"role": "user", "content": prompt}],
            temperature=0.1,
            max_tokens=200,
        )

        content = completion.choices[0].message.content.strip()

        # Try parsing JSON
        try:
            result = json.loads(content)
        except json.JSONDecodeError:
            match = re.search(r'\[(.*?)\]', content)
            if match:
                inner = match.group(1)
                result = [n.strip().strip('"').strip() for n in inner.split(",") if n.strip()]
            else:
                result = re.findall(r"(?:Dr\.?|Prof\.?|Mr\.?|Ms\.?)?\s?[A-Z][a-z]+(?:\s[A-Z][a-z]+)+", content)

        if isinstance(result, str):
            result = [result]
        result = [r.strip() for r in result if r.strip()]
        result = list(set(result))

    except Exception as e:
        print(f"Error processing description: {e}")
        result = []

    return result


# ------------------------------------------------------
# 🔁 Apply once per unique video_id
# ------------------------------------------------------

def assign_guest_names(df: pd.DataFrame) -> pd.DataFrame:
    """
    Apply guest extraction once per unique video_id.
    Adds a 'guest_list' column to the DataFrame.
    """
    # Create mapping: video_id → guest list
    mapping = {}
    unique_videos = df.drop_duplicates(subset="video_id")[["video_id", "video_description"]]

    for _, row in unique_videos.iterrows():
        vid = row["video_id"]
        desc = row["video_description"]
        guests = get_guest_names_openrouter(desc)
        mapping[vid] = guests

    # Map results back to main DataFrame
    df["guest_list"] = df["video_id"].map(mapping)
    return df

In [176]:
df['video_description'].values[0]

"Dr Sarah Berry is a Professor in the Department of Nutritional Sciences at King's College London and Chief Scientist at ZOE. She is also the lead nutritional scientist on the PREDICT program, the world’s largest in-depth nutritional research programme.   00:00 Intro 02:04 Two Decades Studying the Impact of Food on Our Health 04:52 What Is the Food Matrix and How Does It Affect Our Health? 07:03 Why Do We Need Processed Food? 08:15 When Is Processed Food Bad? 11:09 How Long Does It Take for My Br"

In [177]:
df = assign_guest_names(df)

In [178]:
df.head(1)

Unnamed: 0,channel_name,channel_id,video_id,video_title,video_description,video_published_at,view_count,video_like_count,comment_count,comment_id,comment_text,author,author_id,comment_like_count,comment_published_at,is_pinned,is_reply,parent_comment_id,days_since_published,guest_list
0,The Diary Of A CEO,UCGq-a57w-aPwyi3pW7XLiHw,eOnIWDMNyfE,The Nutritional Scientist: Do Not Eat After 9pm! Link Between Chewing & Belly Fat!,"Dr Sarah Berry is a Professor in the Department of Nutritional Sciences at King's College London and Chief Scientist at ZOE. She is also the lead nutritional scientist on the PREDICT program, the world’s largest in-depth nutritional research programme. 00:00 Intro 02:04 Two Decades Studying the Impact of Food on Our Health 04:52 What Is the Food Matrix and How Does It Affect Our Health? 07:03 Why Do We Need Processed Food? 08:15 When Is Processed Food Bad? 11:09 How Long Does It Take for My Br",2025-01-27 08:00:37+00:00,749464,15169,3700,UgzXObkDgCKvQyFcSYB4AaABAg,"Seed oil is not bad for you?? At this point, we need a panel discussion with all of these scientists, biochemist, and nutritionists. Also, we need to know who all these people are sponsored by or paid by because that makes the difference in the message or their so-called narrative!",@NaeNae23,UCg5Kj8O8iYjICUwUkPYwacQ,3576,2025-01-27T08:34:47Z,False,False,,256,[Dr Sarah Berry]


In [179]:
df['guest_list'].value_counts()

guest_list
[Scott Galloway, Daniel Priestly, Konstantin Kisin]    22318
[Dr Anna Lembke]                                       13362
[Daniel Priestley]                                      9263
[Dr Jordan Peterson]                                    6513
[Paul Brunson]                                          6413
[Dr Georgia Ede]                                        6409
[Dr Sarah Berry]                                        3812
[Charlie Houpert]                                       3492
[Dr Stacy Sims]                                         3148
[Dr Mohit Khera]                                         844
Name: count, dtype: int64

### Topic Category

In [180]:
import ollama
import json

def get_topic_category(title: str) -> str:
    """
    Use Ollama to classify the video title into a topic category.
    Example categories: health, mental health, productivity, finance, relationships, entrepreneurship, other.
    """
    prompt = f"""
    You are a helpful assistant. Categorize the following YouTube video title into ONE broad category:
    - health
    - mental health / psychology
    - productivity / personal development
    - finance
    - relationships
    - entrepreneurship / business
    - Religion / Spirituality
    - Technology
    - Education
    - Lifestyle
    - Entertainment
    - other

    Title: "{title}"

    Return only the category name, nothing else.
    """

    response = ollama.chat(
        model="llama3.2:3b",  # you can swap to another local model
        messages=[{"role": "user", "content": prompt}]
    )
    return response["message"]["content"].strip().lower()


In [181]:
# Enable tqdm for pandas
tqdm.pandas()

# Extract unique video_id/title pairs
video_meta = df[["video_id", "video_title"]].drop_duplicates()

# Apply Ollama category classification
video_meta["Topic_Category"] = video_meta["video_title"].apply(get_topic_category)

# Merge back into main dataframe
df = df.merge(video_meta[["video_id", "Topic_Category"]], on="video_id", how="left")

In [182]:
df['Topic_Category'].value_counts()

Topic_Category
mental health / psychology             23263
politics/news                          22318
entrepreneurship / business             9263
health                                  7804
relationships                           6513
productivity / personal development     6413
Name: count, dtype: int64

## Top Funnel KPIs

In [183]:
print(df.shape)
df.head(2)

(75574, 21)


Unnamed: 0,channel_name,channel_id,video_id,video_title,video_description,video_published_at,view_count,video_like_count,comment_count,comment_id,comment_text,author,author_id,comment_like_count,comment_published_at,is_pinned,is_reply,parent_comment_id,days_since_published,guest_list,Topic_Category
0,The Diary Of A CEO,UCGq-a57w-aPwyi3pW7XLiHw,eOnIWDMNyfE,The Nutritional Scientist: Do Not Eat After 9pm! Link Between Chewing & Belly Fat!,"Dr Sarah Berry is a Professor in the Department of Nutritional Sciences at King's College London and Chief Scientist at ZOE. She is also the lead nutritional scientist on the PREDICT program, the world’s largest in-depth nutritional research programme. 00:00 Intro 02:04 Two Decades Studying the Impact of Food on Our Health 04:52 What Is the Food Matrix and How Does It Affect Our Health? 07:03 Why Do We Need Processed Food? 08:15 When Is Processed Food Bad? 11:09 How Long Does It Take for My Br",2025-01-27 08:00:37+00:00,749464,15169,3700,UgzXObkDgCKvQyFcSYB4AaABAg,"Seed oil is not bad for you?? At this point, we need a panel discussion with all of these scientists, biochemist, and nutritionists. Also, we need to know who all these people are sponsored by or paid by because that makes the difference in the message or their so-called narrative!",@NaeNae23,UCg5Kj8O8iYjICUwUkPYwacQ,3576,2025-01-27T08:34:47Z,False,False,,256,[Dr Sarah Berry],health
1,The Diary Of A CEO,UCGq-a57w-aPwyi3pW7XLiHw,eOnIWDMNyfE,The Nutritional Scientist: Do Not Eat After 9pm! Link Between Chewing & Belly Fat!,"Dr Sarah Berry is a Professor in the Department of Nutritional Sciences at King's College London and Chief Scientist at ZOE. She is also the lead nutritional scientist on the PREDICT program, the world’s largest in-depth nutritional research programme. 00:00 Intro 02:04 Two Decades Studying the Impact of Food on Our Health 04:52 What Is the Food Matrix and How Does It Affect Our Health? 07:03 Why Do We Need Processed Food? 08:15 When Is Processed Food Bad? 11:09 How Long Does It Take for My Br",2025-01-27 08:00:37+00:00,749464,15169,3700,UgwPRXvRdUbJsOc6_4F4AaABAg,Now Dr. Eric Berg must be the next guest to review this interview. Period!,@saiqasajjad4849,UCDOjl0Zpd7SR78eHNHfIu6A,777,2025-01-27T16:37:31Z,False,False,,256,[Dr Sarah Berry],health


In [184]:
df.columns

Index(['channel_name', 'channel_id', 'video_id', 'video_title',
       'video_description', 'video_published_at', 'view_count',
       'video_like_count', 'comment_count', 'comment_id', 'comment_text',
       'author', 'author_id', 'comment_like_count', 'comment_published_at',
       'is_pinned', 'is_reply', 'parent_comment_id', 'days_since_published',
       'guest_list', 'Topic_Category'],
      dtype='object')

In [185]:
print(df['guest_list'].value_counts())
print("\n")
print(df['Topic_Category'].value_counts())

guest_list
[Scott Galloway, Daniel Priestly, Konstantin Kisin]    22318
[Dr Anna Lembke]                                       13362
[Daniel Priestley]                                      9263
[Dr Jordan Peterson]                                    6513
[Paul Brunson]                                          6413
[Dr Georgia Ede]                                        6409
[Dr Sarah Berry]                                        3812
[Charlie Houpert]                                       3492
[Dr Stacy Sims]                                         3148
[Dr Mohit Khera]                                         844
Name: count, dtype: int64


Topic_Category
mental health / psychology             23263
politics/news                          22318
entrepreneurship / business             9263
health                                  7804
relationships                           6513
productivity / personal development     6413
Name: count, dtype: int64


In [186]:
video_df = (
    df.drop_duplicates(subset="video_id")
      .loc[:, ["days_since_published","video_id", "video_title", "guest_list", "Topic_Category",
               "view_count", "video_like_count", "comment_count"]]
)

In [187]:
video_df["n_guests"] = video_df["guest_list"].apply(lambda x: len(x) if isinstance(x, list) and len(x) > 0 else 1)
video_df["views_per_guest"] = (video_df["view_count"] / video_df["n_guests"]).round(2)
video_df["likes_per_guest"] = (video_df["video_like_count"] / video_df["n_guests"]).round(2)
video_df["comments_per_guest"] = (video_df["comment_count"] / video_df["n_guests"]).round(2)

# Now safely explode
guest_df = video_df.explode("guest_list")

# Each guest gets fair share
guest_df = guest_df.rename(columns={"guest_list": "guest"})

In [188]:
pd.set_option("display.max_colwidth", None)

print(guest_df.shape)
guest_df

(12, 12)


Unnamed: 0,days_since_published,video_id,video_title,guest,Topic_Category,view_count,video_like_count,comment_count,n_guests,views_per_guest,likes_per_guest,comments_per_guest
0,256,eOnIWDMNyfE,The Nutritional Scientist: Do Not Eat After 9pm! Link Between Chewing & Belly Fat!,Dr Sarah Berry,health,749464,15169,3700,1,749464.0,15169.0,3700.0
3812,249,zg4Ta7vXiIM,The Charisma Teacher: Psychology Of Why People Don't Like You! People Are Attracted To These Traits!,Charlie Houpert,mental health / psychology,2589022,56655,3410,1,2589022.0,56655.0,3410.0
7304,267,B7tnfSPySb0,The Keto Psychiatrist: What Keto Is Really Doing To Your Body! Can It Cure 43% Of Mental Illness?,Dr Georgia Ede,mental health / psychology,2038491,61491,6274,1,2038491.0,61491.0,6274.0
13713,270,Hik6OY-nk4c,Jordan B Peterson: You Need To Listen To Your Wife! We've Built A Lonely & Sexless Society!,Dr Jordan Peterson,relationships,2046763,54184,6392,1,2046763.0,54184.0,6392.0
20226,274,IgW1jepnJp4,"Get Your Sex Life Back! What Everyone Gets Wrong About Sex, Libido & Erectile Dysfunction - Dr Khera",Dr Mohit Khera,health,729839,12731,826,1,729839.0,12731.0,826.0
21070,253,6ikIGGi859w,"Paul Brunson: Women Need To Lower Their Standards! If They Have These 3 Traits, Never Let Them Go!",Paul Brunson,productivity / personal development,1200300,24059,6334,1,1200300.0,24059.0,6334.0
27483,260,qpBnYB0I6_0,US ELECTION DEBATE: What Trump’s Return REALLY Means For The World! Is The UK About To Collapse?,Scott Galloway,politics/news,3527595,101258,21973,3,1175865.0,33752.67,7324.33
27483,260,qpBnYB0I6_0,US ELECTION DEBATE: What Trump’s Return REALLY Means For The World! Is The UK About To Collapse?,Daniel Priestly,politics/news,3527595,101258,21973,3,1175865.0,33752.67,7324.33
27483,260,qpBnYB0I6_0,US ELECTION DEBATE: What Trump’s Return REALLY Means For The World! Is The UK About To Collapse?,Konstantin Kisin,politics/news,3527595,101258,21973,3,1175865.0,33752.67,7324.33
49801,281,R6xbXOp7wDA,Dopamine Expert: Doing This Once A Day Fixes Your Dopamine! What Alcohol Is Doing To Your Brain!,Dr Anna Lembke,mental health / psychology,5043118,163175,13061,1,5043118.0,163175.0,13061.0


#### Topic Score

In [189]:
#### TOPIC SCORE
## Drop duplicate video ids to not get the same podcast counted multiple times

topic_df = (
    df.drop_duplicates(subset="video_id")
      .loc[:, ["video_id", "Topic_Category", "view_count", "video_like_count", "comment_count"]]
)


In [190]:
print(topic_df.shape)
topic_df

(10, 5)


Unnamed: 0,video_id,Topic_Category,view_count,video_like_count,comment_count
0,eOnIWDMNyfE,health,749464,15169,3700
3812,zg4Ta7vXiIM,mental health / psychology,2589022,56655,3410
7304,B7tnfSPySb0,mental health / psychology,2038491,61491,6274
13713,Hik6OY-nk4c,relationships,2046763,54184,6392
20226,IgW1jepnJp4,health,729839,12731,826
21070,6ikIGGi859w,productivity / personal development,1200300,24059,6334
27483,qpBnYB0I6_0,politics/news,3527595,101258,21973
49801,R6xbXOp7wDA,mental health / psychology,5043118,163175,13061
63163,sFkR34AMPw8,entrepreneurship / business,3551557,86817,9132
72426,It5_C6AF1pk,health,1624462,48760,3137


In [191]:
#### AVERAGE by Topic
## Average here works better than sum so topic categories that appear more don't bias the results!

topic_stats = (
    topic_df.groupby("Topic_Category", as_index=False)
             .agg({
                 "view_count": "mean",
                 "video_like_count": "mean",
                 "comment_count": "mean",
                 "video_id": "count"
             })
             .rename(columns={"video_id": "n_videos"})
)


In [192]:
#### Normalize Metrics
## Views, likes, and comments are in different scales — we want to make them comparable
## 👉 This rescales all values of that column between 0 and 1, using:

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
for col in ["view_count", "video_like_count", "comment_count"]:
    topic_stats[f"{col}_norm"] = scaler.fit_transform(topic_stats[[col]])

In [193]:
#### apply weights

topic_stats["weighted_score"] = (
    0.5 * topic_stats["comment_count_norm"] +
    0.3 * topic_stats["video_like_count_norm"] +
    0.2 * topic_stats["view_count_norm"]
).round(2)

In [194]:
topic_stats["rank"] = topic_stats["weighted_score"].rank(ascending=False)
topic_stats['rank'] = topic_stats['rank'].astype(int)

topic_stats.sort_values("weighted_score", ascending=False)

Unnamed: 0,Topic_Category,view_count,video_like_count,comment_count,n_videos,view_count_norm,video_like_count_norm,comment_count_norm,weighted_score,rank
3,politics/news,3527595.0,101258.0,21973.0,1,0.99048,1.0,1.0,1.0,1
0,entrepreneurship / business,3551557.0,86817.0,9132.0,1,1.0,0.812938,0.338729,0.61,2
2,mental health / psychology,3223544.0,93773.666667,7581.666667,3,0.869679,0.903051,0.258892,0.57,3
5,relationships,2046763.0,54184.0,6392.0,1,0.40214,0.390225,0.197628,0.3,4
4,productivity / personal development,1200300.0,24059.0,6334.0,1,0.065838,0.0,0.194641,0.11,5
1,health,1034588.0,25553.333333,2554.333333,3,0.0,0.019357,0.0,0.01,6


- 1.0 = best performing topic in that metric
- 0.0 = weakest performing topic
- Values between 0–1 = proportionally scaled in between.

#### Guest Score

##### KPI values are divided equally among guests to avoid inflating multi-guest episodes.
##### This assumes each guest contributes roughly equally to the episode’s performance.

In [195]:
pd.set_option("display.max_colwidth", None)

print(guest_df.shape)
guest_df

(12, 12)


Unnamed: 0,days_since_published,video_id,video_title,guest,Topic_Category,view_count,video_like_count,comment_count,n_guests,views_per_guest,likes_per_guest,comments_per_guest
0,256,eOnIWDMNyfE,The Nutritional Scientist: Do Not Eat After 9pm! Link Between Chewing & Belly Fat!,Dr Sarah Berry,health,749464,15169,3700,1,749464.0,15169.0,3700.0
3812,249,zg4Ta7vXiIM,The Charisma Teacher: Psychology Of Why People Don't Like You! People Are Attracted To These Traits!,Charlie Houpert,mental health / psychology,2589022,56655,3410,1,2589022.0,56655.0,3410.0
7304,267,B7tnfSPySb0,The Keto Psychiatrist: What Keto Is Really Doing To Your Body! Can It Cure 43% Of Mental Illness?,Dr Georgia Ede,mental health / psychology,2038491,61491,6274,1,2038491.0,61491.0,6274.0
13713,270,Hik6OY-nk4c,Jordan B Peterson: You Need To Listen To Your Wife! We've Built A Lonely & Sexless Society!,Dr Jordan Peterson,relationships,2046763,54184,6392,1,2046763.0,54184.0,6392.0
20226,274,IgW1jepnJp4,"Get Your Sex Life Back! What Everyone Gets Wrong About Sex, Libido & Erectile Dysfunction - Dr Khera",Dr Mohit Khera,health,729839,12731,826,1,729839.0,12731.0,826.0
21070,253,6ikIGGi859w,"Paul Brunson: Women Need To Lower Their Standards! If They Have These 3 Traits, Never Let Them Go!",Paul Brunson,productivity / personal development,1200300,24059,6334,1,1200300.0,24059.0,6334.0
27483,260,qpBnYB0I6_0,US ELECTION DEBATE: What Trump’s Return REALLY Means For The World! Is The UK About To Collapse?,Scott Galloway,politics/news,3527595,101258,21973,3,1175865.0,33752.67,7324.33
27483,260,qpBnYB0I6_0,US ELECTION DEBATE: What Trump’s Return REALLY Means For The World! Is The UK About To Collapse?,Daniel Priestly,politics/news,3527595,101258,21973,3,1175865.0,33752.67,7324.33
27483,260,qpBnYB0I6_0,US ELECTION DEBATE: What Trump’s Return REALLY Means For The World! Is The UK About To Collapse?,Konstantin Kisin,politics/news,3527595,101258,21973,3,1175865.0,33752.67,7324.33
49801,281,R6xbXOp7wDA,Dopamine Expert: Doing This Once A Day Fixes Your Dopamine! What Alcohol Is Doing To Your Brain!,Dr Anna Lembke,mental health / psychology,5043118,163175,13061,1,5043118.0,163175.0,13061.0


In [196]:
#### Aggregate metrics by guest

guest_stats = (
    guest_df.groupby("guest", as_index=False)
             .agg({
                 "views_per_guest": "mean",
                 "likes_per_guest": "mean",
                 "comments_per_guest": "mean",
                 "video_id": "count"
             })
             .rename(columns={"video_id": "appearances"})
)


In [197]:
guest_stats

Unnamed: 0,guest,views_per_guest,likes_per_guest,comments_per_guest,appearances
0,Charlie Houpert,2589022.0,56655.0,3410.0,1
1,Daniel Priestley,3551557.0,86817.0,9132.0,1
2,Daniel Priestly,1175865.0,33752.67,7324.33,1
3,Dr Anna Lembke,5043118.0,163175.0,13061.0,1
4,Dr Georgia Ede,2038491.0,61491.0,6274.0,1
5,Dr Jordan Peterson,2046763.0,54184.0,6392.0,1
6,Dr Mohit Khera,729839.0,12731.0,826.0,1
7,Dr Sarah Berry,749464.0,15169.0,3700.0,1
8,Dr Stacy Sims,1624462.0,48760.0,3137.0,1
9,Konstantin Kisin,1175865.0,33752.67,7324.33,1


In [198]:
#### Normalize each metric

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
for col in ["views_per_guest", "likes_per_guest", "comments_per_guest"]:
    guest_stats[f"{col}_norm"] = scaler.fit_transform(guest_stats[[col]])

In [199]:
#### Apply weights

guest_stats["weighted_score"] = (
    0.5 * guest_stats["comments_per_guest_norm"] +
    0.3 * guest_stats["likes_per_guest_norm"] +
    0.2 * guest_stats["views_per_guest_norm"]
).round(2)


In [200]:
guest_stats["rank"] = guest_stats["weighted_score"].rank(ascending=False)
guest_stats['rank'] = guest_stats['rank'].astype(int)

guest_stats.sort_values("weighted_score", ascending=False).head(10)

Unnamed: 0,guest,views_per_guest,likes_per_guest,comments_per_guest,appearances,views_per_guest_norm,likes_per_guest_norm,comments_per_guest_norm,weighted_score,rank
3,Dr Anna Lembke,5043118.0,163175.0,13061.0,1,1.0,1.0,1.0,1.0,1
1,Daniel Priestley,3551557.0,86817.0,9132.0,1,0.654193,0.492449,0.678872,0.62,2
4,Dr Georgia Ede,2038491.0,61491.0,6274.0,1,0.303401,0.324107,0.44528,0.38,3
5,Dr Jordan Peterson,2046763.0,54184.0,6392.0,1,0.305319,0.275538,0.454924,0.37,4
2,Daniel Priestly,1175865.0,33752.67,7324.33,1,0.103408,0.139731,0.531126,0.33,6
9,Konstantin Kisin,1175865.0,33752.67,7324.33,1,0.103408,0.139731,0.531126,0.33,6
11,Scott Galloway,1175865.0,33752.67,7324.33,1,0.103408,0.139731,0.531126,0.33,6
0,Charlie Houpert,2589022.0,56655.0,3410.0,1,0.431037,0.291962,0.211197,0.28,8
10,Paul Brunson,1200300.0,24059.0,6334.0,1,0.109073,0.075297,0.450184,0.27,9
8,Dr Stacy Sims,1624462.0,48760.0,3137.0,1,0.207411,0.239484,0.188884,0.21,10
