In [2]:
import requests
from bs4 import BeautifulSoup, Comment

# Player ID for DeAndre Hopkins
player_id = "HopkDe00"

url = f"https://www.pro-football-reference.com/players/{player_id[0]}/{player_id}.htm"
headers = {"User-Agent": "Mozilla/5.0"}

try:
    response = requests.get(url, headers=headers, timeout=10)
    if response.status_code == 429:
        print(f"🛑 Rate limited at {player_id}.")
    else:
        soup = BeautifulSoup(response.text, 'html.parser')
        comments = soup.find_all(string=lambda text: isinstance(text, Comment))
        print(f"📝 Found {len(comments)} HTML comments.")

        # Instead of printing immediately, collect into a list
        comments_list = []
        for idx, comment in enumerate(comments):
            comments_list.append((idx+1, comment.strip()))

except Exception as e:
    print(f"⚠️ Error fetching {player_id}: {e}")


📝 Found 84 HTML comments.


In [3]:
# Display first 5 comments to inspect manually
for idx, comment in comments_list[:5]:
    print(f"\n\n--- Comment {idx} ---\n")
    print(comment[:1500])  # Only show 1500 characters per comment for readability
    print("\n--- End Comment ---\n")




--- Comment 1 ---

include:start ="/inc/klecko_header_pfr.html_f"

--- End Comment ---



--- Comment 2 ---

no:cookie fast load the css.

--- End Comment ---



--- Comment 3 ---

CSS start

--- End Comment ---



--- Comment 4 ---

CSS END

--- End Comment ---



--- Comment 5 ---

JS START

--- End Comment ---



In [4]:
# Search all comments for any that contain 'receiving_and_rushing'
receiving_comments = []

for idx, comment in comments_list:
    if 'receiving_and_rushing' in comment:
        receiving_comments.append((idx, comment))

print(f"Found {len(receiving_comments)} comments containing 'receiving_and_rushing'.")

for idx, comment in receiving_comments:
    print(f"\n\n--- Relevant Comment {idx} ---\n")
    print(comment[:1500])  # Show the first 1500 characters
    print("\n--- End Comment ---\n")


Found 0 comments containing 'receiving_and_rushing'.


In [5]:
# Save all comments to a file for full inspection

with open("hopkins_html_comments_dump.txt", "w", encoding="utf-8") as f:
    for idx, comment in comments_list:
        f.write(f"\n\n--- Comment {idx} ---\n\n")
        f.write(comment)
        f.write("\n\n--- End Comment ---\n\n")

print("✅ All comments dumped to 'hopkins_html_comments_dump.txt'")


✅ All comments dumped to 'hopkins_html_comments_dump.txt'
