In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from sqlalchemy import select
from pastiche import tables
from pastiche.database import SessionLocal
from datetime import date

In [17]:
with SessionLocal() as db:
    solutions = db.query(tables.JumbleGame).limit(10).all()
    print(solutions[0])

<pastiche.tables.JumbleGame object at 0x13316f990>


In [4]:
import pandas as pd
from pastiche.game import JumbleGame, JumbleGameCollection
from tqdm import tqdm

In [5]:
path = '../data/jumble_answers_data.json'

data = JumbleGameCollection.from_jumble_answers(path)

100%|██████████| 1061/1061 [00:00<00:00, 7460.96it/s]


In [7]:
game = data.games[0]

In [11]:
game.solution, game.solution_unjumbled

('DAYINDAYOUT', 'DAY IN (AND) DAY OUT')

In [13]:
def sanitize_solution(solution: str, letters: str, sub: str = "*"):
    """
    Sanitises a solution by replacing all letters present in solution by _ without touching the words special characters or worlds between ()
    example:
    sanitize_solution('DAY IN (AND) DAY OUT', 'DAYINDAYOUT')
    >>> '*** ** (AND) *** ***'
    """
    output = []
    in_parenthesis = False
    for character in solution:
        # Check if character is '(', and then we start skipping
        if character == '(':
            in_parenthesis = True
        # Check if character is ')', and then we stop skipping
        elif character == ')':
            in_parenthesis = False
        # If character is not in parenthesis & found in letters, replace it with sub
        if not in_parenthesis and character.upper() in letters.upper() and character.isalpha():
            output.append(sub)
        else: # Append the character as it is.
            output.append(character)
    return ''.join(output)


In [22]:
for s in solutions:
    sanitized = sanitize_solution(s.solution_unjumbled, s.solution)
    print(s.solution_unjumbled, "|", sanitized)
    assert len(sanitized) == len(s.solution_unjumbled)

DAY IN (AND) DAY OUT | *** ** (AND) *** ***
“WADE” AND SEE | “****” *** ***
RUNNING TOTAL | ******* *****
A LITTLE DIFFERENT | * ****** *********
APPLY HIMSELF | ***** *******
TAUGHT TAUT | ****** ****
QUOTATION “MARX” | ********* “****”
GRAB A BITE | **** * ****
NOT “YETI” | *** “****”
TOOK IT (TO) HEART | **** ** (TO) *****


In [24]:
from datetime import timedelta

In [30]:
from itertools import groupby

In [40]:
value_dates = [date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 5), date(2022, 1, 6), date(2022, 1, 10)]

date_range = [value_dates[0] + timedelta(days=x)
              for x in range((value_dates[-1]-value_dates[0]).days + 1)]

dates_to_check = [(t, t in value_dates) for t in date_range]

consecutive_dates = []
for k, g in groupby(enumerate(dates_to_check), lambda x: x[1]):
    if k[1]:
        consecutive_dates.append(list(map(lambda x: k[0], list(g))))

In [77]:
df = pd.DataFrame(index=value_dates,columns=['played'])
df['played'] = 1
df.index = pd.to_datetime(df.index)
df = df.resample("D").first()

played_dates = ~df['played'].isnull()

from itertools import groupby

consecutive_dates = []
for k, g in groupby(enumerate(played_dates), lambda x: x[1]):
    print(k)
    print(list(g))
    if k:
        consecutive_dates.append(list(map(lambda x: x[0], list(g))))

True
[(0, True), (1, True)]
False
[(2, False), (3, False)]
True
[(4, True), (5, True)]
False
[(6, False), (7, False), (8, False)]
True
[(9, True)]


In [74]:
consecutive_missing

[[], [], []]

In [None]:
replace_dict = {
        "“PURR” HIS REQUEST": "“PURR” (HIS) REQUEST",
        "ASSIGNED THE JOB": "ASSIGNED (THE) JOB",
        "FOR THE BIRDS": "FOR (THE) BIRDS",
        "GO HAND IN HAND": "GO HAND (IN) HAND",
        "ALL IN A DAY’S WORK": "ALL IN (A) DAY’S WORK",
        "LOST HIS SHIRT": "LOST (HIS) SHIRT",
        "HOT UNDER THE COLLAR": "HOT UNDER (THE) COLLAR",
        "BORE THE BRUNT OF IT": "BORE (THE) BRUNT OF IT",
        "BREAK IT TO HER": "BREAK (IT) TO HER",
        "FILLED THE BILL": "FILLED (THE) BILL",
        "ROAD TO RECOVERY": "ROAD (TO) RECOVERY",
        "COURT FOR THE COURT": "COURT FOR (THE) COURT",
        "MADE A NAME FOR HIMSELF": "MADE A NAME (FOR) HIMSELF",
        "“WRITE” FOR THE JOB": "“WRITE” FOR (THE) JOB",
        "LEFT THE PREMISES": "LEFT (THE) PREMISES",
        "DOWN TO A TRICKLE": "DOWN (TO) A TRICKLE",
        "END OF THE “RODE”": "END OF (THE) “RODE”",
        "RAISING HER CHILDREN": "RAISING (HER) CHILDREN",
        "BANNED THE BAND": "BANNED (THE) BAND",
        "HAD THE UPPER HAND": "HAD (THE) UPPER HAND",
        "BEAR TO THE RIGHT": "BEAR TO (THE) RIGHT",
        "SEEING WAS BELIEVING": "SEEING (WAS) BELIEVING",
        "THROUGH THE ROOF": "THROUGH (THE) ROOF",
        "HEART OF THE CITY": "HEART OF (THE) CITY",
        "KEEP AN OPEN MIND": "KEEP (AN) OPEN MIND",
        "HAS A “FLARE” FOR IT": "HAS (A) “FLARE” FOR IT",
        "LOST HIS BALANCE": "LOST (HIS) BALANCE",
        "DOWN THE HATCH": "DOWN (THE) HATCH",
        "REFUSE THE REFUSE": "REFUSE (THE) REFUSE",
        "TO SAY THE “LEASED”": "TO SAY (THE) “LEASED”",
        "“WAY” THEIR OPTIONS": "“WAY” (THEIR) OPTIONS"
     }

# df['solution_unjumbled'] = df['solution_unjumbled'].replace()

# OpenAI exploration

In [None]:
import openai
from pydantic import Field
from openai_function_call import OpenAISchema

In [None]:
# class JumbleClue(OpenAISchema):
#     """Class representing a sentence used as a clue for a Jumble game"""
#     clue_sentence: str = Field(..., description="Sentence with gaps to be filled.")
#     solution: str = Field(..., description="The words to use to fill the gaps in the clue-sentence. The solution can be made of multiple words and these words are not separated by a space.")
#     full_sentence: str = Field(..., description="The full sentence with all words.")
    
# class JumbleCluesCollection(OpenAISchema):
#     """List of jumble clues"""
#     clues: list[JumbleClue]
    
# prompt1 = """
# Consider the data below: '\n{data}'. 
#                 For each entry in the list, follow the following steps:
#                 1. identify where are the gaps in the `clue_sentence`.
#                 2. reconstruct the correct words using the `solution` by adding missing spaces
#                 3. use the reconstructed solution to create a `full_sentence` by filling the gaps in the `clue_sentence`
# """


In [None]:
class JumbleClue(OpenAISchema):
    """Class representing a sentence used as a clue for a Jumble game"""
    word: str = Field(..., description="A list of letters defining one or multiple words without any space")
    word_with_spaces: str | None = Field(..., description="list of words recreated from `word` with the missing spaces added")
    
class JumbleCluesCollection(OpenAISchema):
    """List of jumble clues"""
    clues: list[JumbleClue]
   
prompt2 = """
Consider the data below: '\n{data}'. For each word, reconstruct the correct words by adding missing spaces.
"""

In [None]:
def generate_jumble_clues(data: str) -> JumbleCluesCollection:
    completion = openai.ChatCompletion.create(
        model="gpt-4",
        temperature=0.1,
        functions=[JumbleCluesCollection.openai_schema],
        function_call={"name": JumbleCluesCollection.openai_schema["name"]},
        messages=[
            {
                "role": "user",
                "content": f"Consider the data below: '\n{data}'. For each word, reconstruct the correct words by adding missing spaces. If you can't find a match please return None."
            },
        ],
        max_tokens=1000,
    )
    return JumbleCluesCollection.from_response(completion)

In [None]:
results = generate_jumble_clues(
    data=solutions[:10]
)

In [None]:
results