In [38]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from itertools import groupby
import json
import os


In [3]:
gh_link = 'https://github.com/cmgchess/Titled-Tuesday-Data'

In [4]:
r = requests.get(gh_link)
soup = BeautifulSoup(r.content, 'html.parser')
content  = i_p = soup.find('div', class_ = 'js-details-container Details')
rows = content.find_all('div', {'class': 'Box-row Box-row--focus-gray py-2 d-flex position-relative js-navigation-item'})

In [5]:
file_names = []
for x in rows:
  a_tag = x.find('a', class_='js-navigation-open')
  text = a_tag.get_text()
  text = text.replace('.json', '')
  if text != "scrapers" and text != "README.md":
    file_names.append(text)

len(file_names)

362

In [6]:
base_url = 'https://www.chess.com/tournament/live/'

In [None]:
events = []
for i in file_names:
  r = requests.get(base_url+i)
  soup = BeautifulSoup(r.content, 'html.parser')
  name_el = soup.find('h1', class_='v5-title-label')
  name = name_el.get_text().strip()
  print("processing: " + name)
  stats_el = soup.find('div', class_='tournaments-live-view-content-stats')
  span_elements = stats_el.find_all('span')
  number_of_players = int(span_elements[1].get_text(strip=True).split()[0])
  date_and_time = span_elements[2].get_text(strip=True)
  parsed_date = datetime.strptime(date_and_time, "%b %d, %Y, %I:%M %p")
  formatted_date = parsed_date.strftime("%d.%m.%Y")
  formatted_time = parsed_date.strftime("%I:%M %p").replace('\u202f', ' ')
  event = {
    "title": name,
    "num_players": number_of_players,
    "date": formatted_date,
    "time": formatted_time,
    "tourney_link": base_url+i,
    "data": f"https://github.com/cmgchess/Titled-Tuesday-Data/blob/main/{i}.json"
  }
  events.append(event)

processing: *** Biggest EVER Titled Tuesday
processing: *** MasterClass Titled Tuesday Blitz
processing: *** MasterClass Titled Tuesday Blitz
processing: *** MasterClass Titled Tuesday Blitz
processing: *** MasterClass Titled Tuesday Blitz
processing: *** MasterClass Titled Tuesday Blitz
processing: *** MasterClass Titled Tuesday Blitz
processing: *** MasterClass Titled Tuesday Blitz
processing: *** Monthly 3|2 Blitz Masters
processing: *** Monthly 3|2 Blitz Masters
processing: *** Qualifier 1: Titled Tuesday 3|2 Blitz
processing: *** Qualifier 2: Titled Tuesday 3|2 Blitz
processing: *** Qualifier 3: Titled Tuesday 3|2 Blitz
processing: *** Qualifier 4: Titled Tuesday 3|2 Blitz
processing: *** SCC Grand Prix: Titled Tuesday Blitz
processing: *** SCC Grand Prix: Titled Tuesday Blitz
processing: *** SCC Grand Prix: Titled Tuesday Blitz
processing: *** SCC Grand Prix: Titled Tuesday Blitz
processing: *** SCC Grand Prix: Titled Tuesday Blitz
processing: *** SCC Grand Prix: Titled Tuesday B

In [None]:
events.sort(key=lambda x: (datetime.strptime(x["date"], "%d.%m.%Y"), x["time"]), reverse=True)


In [None]:
grouped_data = {}
for year, items in groupby(events, key=lambda x: datetime.strptime(x["date"], "%d.%m.%Y").year):
    grouped_data[year] = list(items)


In [13]:
output_filename = f"/content/drive/MyDrive/TTChesscom/Ranks/tournament_meta.json"
with open(output_filename, 'w') as json_file:
  json.dump(grouped_data, json_file, indent=4)
print("written: " + output_filename)

written: /content/drive/MyDrive/TTChesscom/Ranks/tournament_meta.json


In [14]:
from tabulate import tabulate


In [25]:
custom_headers = {
    "title": "Title",
    "num_players": "Number of Players",
    "date": "Date",
    "time": "Time",
    "tourney_link": "Tournament Link",
    "data": "Rank List"
}
headers = ["Title","Number of Players","Date","Time","Tournament Link","Rank List"]

In [40]:
for year, entries in grouped_data.items():
    file_name = f"{year}.md"
    entries_with_custom_names = [{custom_headers.get(col, col): value for col, value in entry.items()} for entry in entries]
    for entry in entries_with_custom_names:
      if 'Title' in entry:
        entry['Title'] = entry['Title'].replace("|", "\\|")
      if 'Tournament Link' in entry:
        entry['Tournament Link'] = f"[Link]({entry['Tournament Link']})"
      if 'Rank List' in entry:
        entry['Rank List'] = f"[Link]({entry['Rank List']})"
    table = tabulate(entries_with_custom_names,headers="keys", tablefmt="github")
    with open(file_name, "w") as file:
        file.write(table)