# YouTube Watch History Analysis

This notebook analyzes YouTube watch history data from a JSON file and filters the watched videos based on certain criteria.

In [27]:
import json
from datetime import datetime, timedelta, timezone
from dateutil.parser import isoparse

In [28]:
with open("watch-history.json", "r") as file:
    file_content = file.read()

content = json.loads(file_content)

In [36]:
titles = []
unique_titles = set()

one_year_ago = datetime.now(timezone.utc) - timedelta(days=365)
filter_words = ["short", "#", "https"]
last_date = None

In [37]:
for watched in content:
    title = watched["title"].removeprefix("Watched ")
    date_string = watched["time"]
    parsed_date = isoparse(date_string)
    if last_date is not None:
        if last_date - parsed_date < timedelta(minutes=1):
            continue

    last_date = parsed_date

    # contains emoji
    if any(ord(char) > 127 for char in title):
        continue
    if any(word in title.lower() for word in filter_words):
        continue

    if parsed_date < one_year_ago:
        continue
    if "\n" in title:
        title = title.split("\n")[0]

    titles.append(title)
    unique_titles.add(title)

In [None]:
print(f"Total videos: {len(titles)}")
print(f"Unique videos: {len(unique_titles)}")

with open("watched.txt", "w") as f:
    f.write("\n".join(unique_titles))

In [None]:
from IPython.display import Markdown as md

dublicates = [x for x in titles if titles.count(x) > 1]
title_count = {}
for title in dublicates:
    if not title_count.get(title) == None:
        title_count[title] = title_count[title] +1
    else:
        title_count[title] = 1


sorted_titles = sorted(title_count.items(), key=lambda x: x[1], reverse=True)[:5]

raw = "# Watched multiple times"
for title, count in sorted_titles:
    raw += f"\n- {title} - {count} times"
md(raw)

In [40]:
import os
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv()
OPENAPI_KEY = os.getenv('OPENAPI_KEY')
client = OpenAI(api_key=OPENAPI_KEY)

In [41]:
insights_prompt = """
I have a list of video titles that represent the videos I’ve watched. Analyze this list and help me understand myself better by answering the following questions:
	1.	Who am I?
	•	Based on the video titles, what can you infer about my personality, habits, or lifestyle?
	•	How would you describe me to someone else based on this history?
	2.	What is my passion?
	•	What recurring themes or topics in the video titles suggest things I’m deeply passionate about?
	•	Are there any patterns that point to hobbies, skills, or activities I’m likely to enjoy or invest my time in?
	3.	What am I interested in?
	•	What kinds of topics, genres, or creators am I most drawn to?
	•	Are there any niche areas or unique interests that stand out?
	4.	What is something I should be known for?
	•	Based on this history, what might people associate me with or recognize me for?
	•	Do my video choices reveal a talent, knowledge, or perspective I might excel in?
	5.	What are things I care about?
	•	Are there values, causes, or ideas that seem important to me based on the videos I watch?
	•	Do my choices reflect a focus on self-improvement, learning, entertainment, or something else?

Here’s the list of video titles:

{titles}

Analyze the titles and answer these questions to help me discover and define more about myself.
"""

In [None]:
response_output = None
try:
    response = client.chat.completions.create(
        model="gpt-4o-mini", 
        messages=[
            {
                "role": "user",
                "content": insights_prompt.format(titles=unique_titles)
            }
        ],
        max_tokens=1000,
        temperature=0.4
    )
    
    response_output = "# AI Analysis of Your YouTube Watch History"
    response_output += "\n\n"
    response_output += response.choices[0].message.content
except Exception as e:
    response_output = f"Error making OpenAI API call: {e}"

md(response_output)

In [None]:
new_years_prompt = """
Bases on what you learned about me, what should I focus on next year? 
What are topics to concentrate on? Where should I spend more time?
What are topics to deep dive on?

Here is the analysis of my what I watched this year:
{analysis}
"""

new_years_response = None
try:
    response = client.chat.completions.create(
        model="gpt-4o-mini", 
        messages=[
            {
                "role": "user",
                "content": new_years_prompt.format(analysis=response_output)
            }
        ],
        max_tokens=1000,
        temperature=0.4
    )
    
    new_years_response = response.choices[0].message.content
except Exception as e:
    new_years_response = f"Error making OpenAI API call: {e}"

md(new_years_response)