# Your Top YouTube Channels

In [None]:
# Import packages
from bs4 import BeautifulSoup
import pandas as pd
import requests
import re
from PIL import Image, ImageFont, ImageDraw
import os

## Parse your data

In [None]:
# Open HTML watch-history file from Google Takeout
soup = BeautifulSoup(open('path\to\your\file.html', encoding='utf8'), 'html.parser')

# Can take sometime to load. Trim your file first if you wish.

In [None]:
year = 2023    # for my analysis I was only interested in 2023's history. If you'd like more, change this line to the earliest year you'd like to include

stop_at = str(year - 1) + ', '

In [None]:
# Find all the outer cells
outer_cells = soup.find_all('div', class_='outer-cell')

data = []

# Initial functions for following loop
def get_link(links, index):
    return links[index].get('href') if len(links) > index else None

def get_date(vid_info):
    return vid_info.find_all('br')[-1].next_sibling.strip()

# Retrieve data from HTML
for div_tag in outer_cells:
    vid_info = div_tag.find(class_="content-cell mdl-cell mdl-cell--6-col mdl-typography--body-1")
    caption = div_tag.find(class_="content-cell mdl-cell mdl-cell--12-col mdl-typography--caption")
    vid_type = vid_info.contents[0].get_text().strip()

    if vid_type == 'Watched':
        date = get_date(vid_info)
        if stop_at in date:
            break
        
        title = vid_info.contents[1].get_text().strip()
        links = vid_info.find_all('a', href=True)
        vid_link = get_link(links, 0)
        channel_link = get_link(links, 1)
        channel_text = vid_info.contents[3].get_text().strip()
        channel = 'Video has been private' if channel_text == date else channel_text
        ad = caption.contents[7].get_text().strip()

    elif vid_type == 'Answered survey question':
        date = get_date(vid_info)
        if stop_at in date:
            break
        title = vid_info.contents[0].get_text().strip()
        channel = 'None'
        ad = 'Survey'
        channel_link = None

    elif vid_type == 'Watched a video that has been removed':
        date = get_date(vid_info)
        if stop_at in date:
            break
        title = vid_info.contents[0].get_text().strip()
        channel = 'Unknown'
        channel_link = None
        ad = 'No'

    else:
        continue

    row_data = [title, vid_link, channel, channel_link, date, ad]
    data.append(row_data)

In [None]:
# Create a DataFrame from the extracted data
history_df = pd.DataFrame(data, columns=['title', 'vid_link', 'channel', 'channel_link', 'date', 'ad'])

In [None]:
history_df.head()

In [None]:
# choosing to drop duplicates on link because some channels use the same title for live streams, and I'm choosing not to count revisiting videos since I did this often for reference
no_dups = history_df.drop_duplicates(subset='vid_link')

In [None]:
# let's finally get those top channels
top_5_df = no_dups.channel.value_counts().sort_values(ascending=False).head(5).reset_index().rename(columns={'index': 'channel', 'channel': 'counts'})

In [None]:
# confirm your top channels don't include ads, etc
top_5_df.head()

## Now let's make it pretty

In [None]:
# Get channel links so we can get profile pictures
top_5_df = pd.merge(top_5_df, history_df[['channel', 'channel_link']], on='channel', how='left').drop_duplicates().reset_index(drop=True)

In [None]:
top_5_df

In [None]:
# File paths
background_image_paths = [
    'background_images/1.jpg',
    'background_images/2.jpg'
]

custom_font_path = 'fonts/youtube-sans-light.ttf'
title_font_path = 'fonts/youtube-sans-medium.ttf'
number_font_path = 'fonts/youtube-sans-bold.ttf'

# Image dimensions
image_width = 240
image_height = 240

# Font sizes
font_size_large = 50
font_size_small = 40
title_font_size = 90
number_font_size = 100

# Load fonts
channel_font = ImageFont.truetype(font=custom_font_path, size=font_size_large)
watched_font = ImageFont.truetype(font=custom_font_path, size=font_size_small)
title_font = ImageFont.truetype(font=title_font_path, size=title_font_size)
number_font = ImageFont.truetype(font=number_font_path, size=number_font_size)

# Iterate over background image paths
for background_image_path in background_image_paths:
    # Load background image
    background_image = Image.open(background_image_path)
   
    # Starting coordinates
    x_offset = 125
    y_offset = 300

    # Iterate over top_5 DataFrame
    for index, row in top_5_df.iterrows():
        channel = row['channel']
        url = row['channel_link']
        count = row['counts']

        response = requests.get(url)
        html_content = response.content.decode('utf-8')

        # Find the meta tag containing og:title and the associated image link
        pattern = r'<meta property="og:title" content=".*?"><link rel="image_src" href="(.*?)">'
        match = re.search(pattern, html_content)

        if match:
            image_link = match.group(1)
            # Download the image
            image_response = requests.get(image_link)
            try:
                # Save the image temporarily
                with open('temp_image.jpg', 'wb') as f:
                    f.write(image_response.content)
                image_path = 'temp_image.jpg'
                
                # Load and resize the image
                image = Image.open(image_path).resize((image_width, image_height))
                
                # Define fill color based on pass
                if background_image_paths.index(background_image_path) == 1:
                    fill_color = (255, 255, 255)  # White
                else:
                    fill_color = (0, 0, 0)  # Black

                # Add title text
                title_text = "My Top YouTube Channels"
                title_text_position = (55, 150)
                draw = ImageDraw.Draw(background_image)
                draw.text(title_text_position, title_text, font=title_font, fill=fill_color)

                # Draw number text
                number_text = str(index + 1)
                number_text_position = (x_offset, y_offset)
                draw.text(number_text_position, number_text, font=number_font, fill=fill_color)

                # Paste the image on the background image
                background_image.paste(image, (x_offset + number_font_size, y_offset))

                # Draw channel text
                channel_text = channel
                channel_text_position = (x_offset + image_width + 135, y_offset)
                draw.text(channel_text_position, channel_text, font=channel_font, fill=fill_color)

                # Draw count text
                count_text = f'{count} videos watched'
                count_text_position = (x_offset + image_width + 135, channel_text_position[1] + font_size_large)
                draw.text(count_text_position, count_text, font=watched_font, fill=fill_color)

                # Add subtext
                subtext = "coded by github.com/apancoast".upper()
                subtext_position = (450, background_image.height - 150)
                draw.text(subtext_position, subtext, font=watched_font, fill=fill_color)
                
                # Delete the temporary image file
                os.remove(image_path)

                y_offset += 280
            except Exception as e:
                print(f'Error processing image from {image_path}: {str(e)}')
        else:
            print(f'Image link not found in the HTML content of {url}')

    # Save the final image
    output_path = 'result_image_' + str(background_image_paths.index(background_image_path) + 1) + '.jpg'
    background_image.save(output_path)