In [1]:
import pandas as pd
from pytz import UTC
from IPython.display import display, HTML
import re

# Step 1: Read the 'casts.csv' file into a DataFrame
df = pd.read_csv('casts.csv')

# Step 2: Convert 'published_at' column to datetime with UTC timezone
df['published_at'] = pd.to_datetime(df['published_at'], utc=True)

# Step 3: Filter rows based on the date range (September 10, 2022 - Saturday September 16, 2022)
start_date = pd.Timestamp('2022-09-10', tz=UTC)
end_date = pd.Timestamp('2022-09-16', tz=UTC)
filtered_df = df[(df['published_at'] >= start_date) & (df['published_at'] <= end_date)].copy()

# Step 4: Filter out replies (exclude casts with non-null parent_hash)
original_posts_df = filtered_df.loc[filtered_df['parent_hash'].isnull()]

# Step 5: Sort the filtered DataFrame based on 'reactions_count' in descending order
sorted_df = original_posts_df.sort_values(by='reactions_count', ascending=False)

# Step 10: Count of total casts made in the week including replies
total_casts_including_replies = filtered_df.shape[0]

# Step 11: Count of total casts made in the week excluding replies
total_casts_excluding_replies = original_posts_df.shape[0]

# Step 12: Display the count of total casts made in the week including replies and excluding replies
total_casts_html = f"""
    <div style="max-width: 600px; margin-bottom: 25px;">
        <h2 style="margin-bottom: 15px;">Total Casts</h2>
        <p style="font-size: 18x; margin: 5px 0 5px;">Including Replies: {total_casts_including_replies}</p>
        <p style="font-size: 18x; margin: 5px 0;">Excluding Replies: {total_casts_excluding_replies}</p>
    </div>
"""
display(HTML(total_casts_html))

# Step 8: Get the top 30 most active users based on unique original casts
top_30_active_users = sorted_df['author_username'].value_counts().nlargest(30)

# Step 9: Display the top 30 most active users excluding replies
active_users_heading_html = "<h2>Top 30 Most Active Users (Excluding Replies)</h2>"
active_users_description_html = "<p>Only original casts are considered for this calculation. Replies are excluded.</p>"
active_users_html = "<ol>"
for user, count in top_30_active_users.items():
    active_users_html += f"<li>{user} - {count} casts</li>"
active_users_html += "</ol>"

display(HTML(active_users_heading_html))
display(HTML(active_users_description_html))
display(HTML(active_users_html))

# Step 8 (part 2): Get the top 30 most active users based on all casts, including replies
top_30_active_users_including_replies = filtered_df['author_username'].value_counts().nlargest(30)

# Step 9 (part 2): Display the top 30 most active users including replies
active_users_including_replies_heading_html = "<h2>Top 30 Most Active Users (Including Replies)</h2>"
active_users_including_replies_description_html = "<p>All casts, including replies, are considered for this calculation.</p>"
active_users_including_replies_html = "<ol>"
for user, count in top_30_active_users_including_replies.items():
    active_users_including_replies_html += f"<li>{user} - {count} casts</li>"
active_users_including_replies_html += "</ol>"

display(HTML(active_users_including_replies_heading_html))
display(HTML(active_users_including_replies_description_html))
display(HTML(active_users_including_replies_html))
# Step 6: Display total count of unique casts
heading_html = f"<h2>Top 100 casts ranked by total likes</h2>"
display(HTML(heading_html))

# Step 7: Display top 100 original posts in individual cards
def create_cast_card(row):
    merkle_root_url = f"https://searchcaster.xyz/search?merkleRoot={row['hash']}"
    text_without_image_urls = re.sub(r'(https?://\S+\.(?:png|jpe?g|gif))', '', row['text'])
    text_with_hyperlinks = re.sub(r'(https?://\S+)', r'<a href="\1" target="_blank">\1</a>', text_without_image_urls)
    text_with_line_breaks = text_with_hyperlinks.replace('\n', '<br>')
    card_html = f"""
    <div style="border: 1px solid #ccc; max-width: 600px; padding: 25px; margin-bottom: 25px;">
        <div style="max-width: 550px; margin: 0 auto;">
            <p><strong><a href="{merkle_root_url}" target="_blank">merkleRoot</a></strong></p>
            <p><strong>@{row['author_username']} · fid {row['author_fid']}</strong></p>
            <div style="word-wrap: break-word; margin: 10px 0;">{text_with_line_breaks}</div>
    """
    if 'http' in row['text']:
        words = row['text'].split()
        for word in words:
            if word.startswith('http') and (word.endswith('.jpg') or word.endswith('.png') or word.endswith('.gif')):
                card_html += f'<div style="text-align: center;"><img src="{word}" style="max-width: 550px; margin-top: 10px;"></div>'

    # Add heart icon and number count for Reactions Count
    purple_heart_icon = "&#x1F49C;"  # Unicode code point for purple heart icon
    card_html += f'<p style="color: purple; margin: 10px 0 0;">{purple_heart_icon} {row["reactions_count"]} '

    # Add chat bubble icon and number count for Replies Count
    chat_bubble_icon = "&#x1F4AC;"  # Unicode code point for chat bubble icon
    card_html += f'<span style="margin-left: 5px; margin-top: 10px;">{chat_bubble_icon} {row["replies_count"]}</span></p>'

    card_html += "</div></div>"
    return card_html

for index, row in sorted_df.head(100).iterrows():
    card = create_cast_card(row)
    display(HTML(card))