# Connections Pilot Notebook

Purpose is to use Selenium to query past Connections games and answers.

By: Elsie Wang

Date: 03/18/24

## Overhead

In [1]:
# Imports
import re
import logging
import os
import time
import random
import csv

import pandas as pd
import numpy as np

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
from collections import defaultdict
from tqdm.notebook import tqdm
from selenium.webdriver.common.by import By

from util import *
from matplotlib import pyplot as plt
import scipy.stats as stats

In [2]:
# Setup logger
if not os.path.exists('../logs/'):
    os.mkdir('../logs/')
if not os.path.exists('../logs/connections-pilot.log'):
    open('../logs/connections-pilot.log', 'a').close()

logging.basicConfig(
    filename='../logs/connections-pilot.log',
    filemode='w',
    format='%(asctime)s %(message)s',
    datefmt='%m/%d/%Y %I:%M:%S %p',
    level=logging.DEBUG
)

In [3]:
# Setup Webdriver Options
options = Options()

options.add_argument("--disable-notifications")
options.add_argument("--incognito")
options.add_experimental_option("prefs", {"plugins.plugins_list": [{"enabled": False, "name": "Chrome PDF Viewer"}]})
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features")
options.add_argument('--disable-extensions')
options.add_argument("--no-sandbox")

options.binary_location = "/usr/local/bin/webdriver"

In [26]:
# Setup Selenium
print("Internet connected: ", internet())
driver = webdriver.Chrome(options=options)

Internet connected:  True


## Collection

In [27]:
# Maps classes of buttons (categories) to difficulty level color
color_map = {
"chakra-button css-1t45e3y":"purple",
"chakra-button css-1febctu": "yellow",
"chakra-button css-nlwqpb": "blue",
"chakra-button css-1p5tzem": "green"}

In [28]:
def connections_search(query, driver):
    """ Searches for given game number on ChromeDriver
    """
    if internet():
        try:
            url = f"https://connectionsplus.io/game/{query}"
            driver.get(url)
            time.sleep(2)
            return driver.page_source
        except CAPTCHAException:
            return driver.page_source
    else:
        print("No internet")
        return None

In [29]:
def play_game():
    """ Driver randomly guesses four words each turn until it loses
    """
    while(len(driver.find_elements(By.CLASS_NAME, "css-os1u4x")) > 0):
        buttons = driver.find_elements(By.CLASS_NAME, "css-butdwn")
        sampled_buttons = random.sample(buttons, 4)
        for button in sampled_buttons:
            button.click()
        driver.find_elements(By.CLASS_NAME, "css-dhvdts")[0].click() # Submit button
        driver.find_elements(By.CLASS_NAME, "css-vpdppp")[1].click() # Deselect All button
    time.sleep(5.5)
    driver.find_elements(By.CLASS_NAME, "css-onkibi")[0].click() #Exit 'X' to see answers

In [30]:
def parse_game(raw_html, query, game_id):
    """ Plays the connection game and collects answers
    """
    all_words = []
    content1 = []
    play_game()

    buttons = driver.find_elements(By.TAG_NAME, "button") # Collects all buttons
    categories = driver.find_elements(By.CLASS_NAME, "css-1gxnet") # Collects all 4 categories
    words = driver.find_elements(By.CLASS_NAME, "css-z9cpgb") # Collects all 16 words
    
    # Iterates through each category and adds all info to dictionary
    for i in range(4):
        try:
            category = categories[i].text
            word_answers = words[i].text.split(', ')
            color = color_map[buttons[i].get_attribute("class")]
            all_words += word_answers
            content1.append(f"{game_id},{word_answers},{category},{color}\n")
        except Exception as e:
            logging.debug(f'Failed to parse ad HTML on query: {query}')
    random.shuffle(all_words)
    content2 = f"{game_id},{all_words}\n"
    return content1, content2

In [32]:
# Collect all answers to games from game 1 to game 282
all_games = np.arange(1, 283)

with open("answers.csv", "a") as file1, open("prompt.csv", "a") as file2:
    # Write the header
    file1.write("Game_ID,Words,Category,Color\n")
    file2.write("Game_ID,Words\n")
    
    # Iterate over each item in the range
    for game in tqdm(all_games):
        query = f"{game}"
        raw_html = connections_search(query, driver)
        content1, content2 = parse_game(raw_html, query, game)
        
        for content in content1:
            file1.write(content)
        file2.write(content2)

  0%|          | 0/188 [00:00<?, ?it/s]