In [None]:
pip install requests beautifulsoup4




In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from datetime import datetime, timedelta
import re

start_date = datetime(2024, 6, 26)
num_days = 390
starting_puzzle_number = 115
base_url = "https://www.cnet.com/tech/gaming/todays-nyt-strands-hints-answers-and-help-for-{month}-{day}-{puzzle_number}/"
output_file = "nyt_strands_dataset.csv"

with open(output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Date", "PuzzleNumber", "Theme", "Spangram", "ClueWords", "Answers"])

for i in range(num_days):
    date = start_date + timedelta(days=i)
    puzzle_number = starting_puzzle_number + i

    if date.month in (9,10,11,12,1,2):

        m = date.strftime("%b").lower()
        month_str = "sept" if m == "sep" else m
    else:
        month_str = date.strftime("%B").lower()

    url = base_url.format(
        month=month_str,
        day=date.day,
        puzzle_number=puzzle_number
    )


    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        page_text = soup.get_text(separator="\n")


        theme = "N/A"
        for tag in soup.find_all(["strong", "b", "p", "h2", "h3"]):
            if "Today's Strands theme is" in tag.text:
                match = re.search(r"Today's Strands theme is[:\s]*([^\n.]+)", tag.text)
                if match:
                    theme = match.group(1).strip()
                    break


        spangram_match = re.search(r"Today's Strands spangram is\s+([A-Z]+)", page_text)
        spangram = spangram_match.group(1).strip() if spangram_match else "N/A"


        clue_words = []
        found_clue_section = False
        for tag in soup.find_all(["h2", "h3", "li"]):
            if tag.name in ["h2", "h3"] and "Clue words" in tag.text:
                found_clue_section = True
            elif found_clue_section and tag.name == "li":

                text = tag.get_text(strip=True)
                if re.match(r"^[A-Z\s,]+\.?$", text) and "," in text:
                    clue_words = [w.strip() for w in text.rstrip(".").split(",")]
                    break


        answers = []
        found_answer_section = False
        for tag in soup.find_all(["h2", "h3", "li"]):
            if tag.name in ["h2", "h3"] and "Answers for today's Strands puzzle" in tag.text:
                found_answer_section = True
            elif found_answer_section and tag.name == "li":
                text = tag.get_text(strip=True)
                if re.match(r"^[A-Z\s,]+\.?$", text) and "," in text:
                    answers = [w.strip() for w in text.rstrip(".").split(",")]
                    break

        with open(output_file, mode='a', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow([
                date.strftime("%Y-%m-%d"),
                puzzle_number,
                theme,
                spangram,
                ", ".join(clue_words),
                ", ".join(answers)
            ])

        print(f"Scraped puzzle {puzzle_number} ({date.strftime('%Y-%m-%d')})")

    except Exception as e:
        print(f"Failed on {date.strftime('%Y-%m-%d')} (Puzzle {puzzle_number}): {e}")


✅ Scraped puzzle 115 (2024-06-26)
✅ Scraped puzzle 116 (2024-06-27)
✅ Scraped puzzle 117 (2024-06-28)
✅ Scraped puzzle 118 (2024-06-29)
✅ Scraped puzzle 119 (2024-06-30)
✅ Scraped puzzle 120 (2024-07-01)
✅ Scraped puzzle 121 (2024-07-02)
✅ Scraped puzzle 122 (2024-07-03)
✅ Scraped puzzle 123 (2024-07-04)
✅ Scraped puzzle 124 (2024-07-05)
✅ Scraped puzzle 125 (2024-07-06)
✅ Scraped puzzle 126 (2024-07-07)
✅ Scraped puzzle 127 (2024-07-08)
✅ Scraped puzzle 128 (2024-07-09)
✅ Scraped puzzle 129 (2024-07-10)
✅ Scraped puzzle 130 (2024-07-11)
✅ Scraped puzzle 131 (2024-07-12)
✅ Scraped puzzle 132 (2024-07-13)
✅ Scraped puzzle 133 (2024-07-14)
✅ Scraped puzzle 134 (2024-07-15)
✅ Scraped puzzle 135 (2024-07-16)
✅ Scraped puzzle 136 (2024-07-17)
✅ Scraped puzzle 137 (2024-07-18)
✅ Scraped puzzle 138 (2024-07-19)
✅ Scraped puzzle 139 (2024-07-20)
✅ Scraped puzzle 140 (2024-07-21)
✅ Scraped puzzle 141 (2024-07-22)
✅ Scraped puzzle 142 (2024-07-23)
✅ Scraped puzzle 143 (2024-07-24)
✅ Scraped puzz

In [None]:
def generate_snaking_grid_with_snaking_spangram(words, spangram, rows=8, cols=6):
    assert sum(len(w) for w in words) + len(spangram) == rows * cols, "Total letters must fill the grid (48)."

    directions = [(-1, -1), (-1, 0), (-1, 1),
                  (0, -1),          (0, 1),
                  (1, -1),  (1, 0), (1, 1)]

    grid = [['' for _ in range(cols)] for _ in range(rows)]
    used = set()
    word_paths = {}

    def is_valid(x, y):
        return 0 <= x < rows and 0 <= y < cols and (x, y) not in used

    def dfs(x, y, word, path):
        if len(path) == len(word):
            return path
        for dx, dy in directions:
            nx, ny = x + dx, y + dy
            if is_valid(nx, ny):
                used.add((nx, ny))
                result = dfs(nx, ny, word, path + [(nx, ny)])
                if result:
                    return result
                used.remove((nx, ny))
        return None

    def touches_opposite_edges(path):
        rows_touched = {x for x, _ in path}
        cols_touched = {y for _, y in path}
        return (0 in rows_touched and rows - 1 in rows_touched) or \
               (0 in cols_touched and cols - 1 in cols_touched)

    def place_word(word, is_spangram=False):
        for i in range(rows):
            for j in range(cols):
                if (i, j) in used:
                    continue
                used.add((i, j))
                path = dfs(i, j, word, [(i, j)])
                if path and (not is_spangram or touches_opposite_edges(path)):
                    for (x, y), ch in zip(path, word):
                        grid[x][y] = ch
                    word_paths[word] = path
                    return True
                used.remove((i, j))
        return False

    if not place_word(spangram, is_spangram=True):
        raise RuntimeError("Could not place spangram touching opposite edges.")

    for word in words:
        if not place_word(word):
            raise RuntimeError(f"Could not place word: {word}")

    return grid, word_paths


In [None]:
spangram = "STRANDS"
answers = ['CURL', 'STRING', 'RIBBON', 'THREAD', 'TENDRIL', 'FILAMENT', 'WISP']
cluewords = ['SLIT', 'WILE', 'MALE', 'MEAL', 'SCAR', 'TRUE', 'TWINE', 'RING', 'RINGS', 'TEND', 'LEAN', 'DRAM', 'DRAT', 'TWIN', 'SWIM', 'LAME', 'READ', 'HEAD', 'BITE']
grid, word_paths = generate_snaking_grid_with_snaking_spangram(answers, spangram, cluewords)

def print_grid(grid):
    print("\nFinal Grid:")
    for row in grid:
        print(" ".join(ch if ch else "." for ch in row))

print_grid(grid)




🔤 Final Grid:
S T R A N D
C U R L S S
G N I R T R
N O B B I T
D A E R H T
I R D N E F
L M A L I W
E N T P S I


In [None]:
print("\n Word Paths:")
for word, path in word_paths.items():
    print(f"{word:<10} → {path}")



📍 Word Paths:
STRANDS    → [(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (1, 4)]
CURL       → [(1, 0), (1, 1), (1, 2), (1, 3)]
STRING     → [(1, 5), (2, 4), (2, 3), (2, 2), (2, 1), (2, 0)]
RIBBON     → [(2, 5), (3, 4), (3, 3), (3, 2), (3, 1), (3, 0)]
THREAD     → [(3, 5), (4, 4), (4, 3), (4, 2), (4, 1), (4, 0)]
TENDRIL    → [(4, 5), (5, 4), (5, 3), (5, 2), (5, 1), (5, 0), (6, 0)]
FILAMENT   → [(5, 5), (6, 4), (6, 3), (6, 2), (6, 1), (7, 0), (7, 1), (7, 2)]
WISP       → [(6, 5), (7, 5), (7, 4), (7, 3)]
WILE       → [(6, 5), (6, 4), (6, 3), (5, 4)]
MALE       → [(6, 1), (6, 2), (6, 3), (5, 4)]
RING       → [(1, 2), (2, 2), (2, 1), (2, 0)]
TEND       → [(4, 5), (5, 4), (5, 3), (5, 2)]
DRAM       → [(4, 0), (5, 1), (6, 2), (6, 1)]
DRAT       → [(4, 0), (5, 1), (6, 2), (7, 2)]
LAME       → [(6, 3), (6, 2), (6, 1), (7, 0)]
READ       → [(4, 3), (4, 2), (4, 1), (4, 0)]
BITE       → [(3, 3), (3, 4), (4, 5), (5, 4)]


In [None]:
import csv
import ast
import json


def update_dataset_with_grids(input_csv, output_csv):
    rows_with_grid = []

    with open(input_csv, mode='r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        fieldnames = reader.fieldnames + ["Grid"]

        for row in reader:
            spangram = row["Spangram"].strip()
            answers_raw = row["Answers"].strip()
            if not spangram or not answers_raw:
                row["Grid"] = "N/A"
                rows_with_grid.append(row)
                continue

            answers = [word.strip() for word in answers_raw.split(",") if word.strip()]
            total_letters = len(spangram) + sum(len(w) for w in answers)
            if total_letters != 48:
                row["Grid"] = "N/A"
                rows_with_grid.append(row)
                continue

            try:
                grid = generate_snaking_grid_with_snaking_spangram(answers, spangram)

                row["Grid"] = json.dumps(grid)
            except Exception as e:
                row["Grid"] = f"ERROR: {e}"
            rows_with_grid.append(row)


    with open(output_csv, mode='w', encoding='utf-8', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(rows_with_grid)

update_dataset_with_grids("nyt_strands_dataset.csv", "nyt_strands_dataset_with_grids.csv")


In [None]:


import pandas as pd

def filter_puzzles_by_grid_status(input_csv, output_csv):
    """
    Reads a CSV, filters out rows where the 'Grid' column is 'N/A' or starts with 'ERROR:',
    and writes the remaining rows to a new CSV.

    Args:
        input_csv (str): Path to the input CSV file.
        output_csv (str): Path where the filtered data will be saved.
    """
    try:
        df = pd.read_csv(input_csv)


        if 'Grid' in df.columns:
            filtered_df = df[
                ~(df['Grid'] == 'N/A') &
                ~df['Grid'].astype(str).str.startswith('ERROR:', na=False)
            ]
        else:
            print(f"Warning: 'Grid' column not found in {input_csv}. No filtering applied.")
            filtered_df = df


        filtered_df.to_csv(output_csv, index=False)
        print(f"Filtered data saved to {output_csv}. Removed {len(df) - len(filtered_df)} rows.")

    except FileNotFoundError:
        print(f"Error: Input file not found at {input_csv}")
    except Exception as e:
        print(f"An error occurred during filtering: {e}")


filter_puzzles_by_grid_status("nyt_strands_dataset_with_grids.csv", "nyt_strands_dataset_filtered.csv")
print("Filtering process completed.")

Filtered data saved to nyt_strands_dataset_filtered.csv. Removed 20 rows.
Filtering process completed.


In [None]:

try:
    df_filtered = pd.read_csv("nyt_strands_dataset_filtered.csv")
    num_puzzles_left = len(df_filtered)
    print(f"Number of puzzles with valid grids left: {num_puzzles_left}")
except FileNotFoundError:
    print("Filtered dataset file not found.")
except Exception as e:
    print(f"An error occurred while counting puzzles: {e}")


Number of puzzles with valid grids left: 342


In [None]:
print(df_filtered)

           Date  PuzzleNumber                    Theme       Spangram  \
0    2024-06-27           116          Better together     COMBOMEALS   
1    2024-06-29           118  It's way over your head     OUTERSPACE   
2    2024-06-30           119            Cut and color     JEWELTONES   
3    2024-07-01           120       We've got the beat     PERCUSSION   
4    2024-07-02           121             Heat and eat        COOKING   
..          ...           ...                      ...            ...   
337  2025-07-16           500         Tech accessories         TABLET   
338  2025-07-17           501           Find your seat       THEATERS   
339  2025-07-18           502             Abracadabra!       ITSMAGIC   
340  2025-07-19           503       Hot enough for ya?  SUMMERWEATHER   
341  2025-07-20           504             Shore thing!    BEACHATTIRE   

                                             ClueWords  \
0    AROMA, CAMO, SANE, COME, COMES, GREASE, CUTS, ...   
1    TI