In [1]:
%load_ext autoreload
%autoreload 2

In [46]:
from pastiche import tables
from pastiche.database import SessionLocal
from datetime import date
from pastiche import config

In [53]:
with SessionLocal() as db:
    solutions = db.query(tables.JumbleGame).order_by(tables.JumbleGame.value_date).limit(100).all()

# image generation

In [26]:
import requests
import shutil
from openai import OpenAI


def generate_dall_e_prompt(client: OpenAI, clue_sentence: str, solution: str, model: str = 'gtp-4'):
    prompt = f"""
    Generate a promt for text-to-image generation following the guidelines below:
    \n
    1. Identify Key Elements: Focus on the main elements of both the clue-sentence and the solution. This could be objects, actions, or themes.
    2. Visual Representation: Translate these elements into visual cues that can be illustrated. For example, if the solution is a play on words, I consider how to represent that pun visually.
    3. Avoid Direct Depiction of Solution: Make sure the image suggests the solution without explicitly showing it. This often involves using metaphors or related imagery.
    4. Context and Setting: Provide a setting or context that aligns with the clue-sentence, enhancing the overall theme.
    5. Detail and Description: The prompt is detailed and descriptive to guide the AI in generating an image that closely matches the intended idea.
    6. To the point: only return the prompt and nothing else
    \n
    Clue-sentence: '{clue_sentence}'\nSolution: '{solution}'
    """.strip()
    
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        model=model,
    )
    
    return chat_completion.choices[0].message.content

def generate_image(client: OpenAI, prompt: str, model: str = 'dall-e-3'):
    response = client.images.generate(
        model=model,
        prompt=prompt,
        size="1024x1024",
        quality="standard",
        n=1,
    )
    
    return response.data[0].url



def download_image(url, filename):
    # Open the url image, set stream to True, this will return the stream content.
    r = requests.get(url, stream = True)

    # Check if the image was retrieved successfully
    if r.status_code == 200:
        # Set decode_content value to True, otherwise the downloaded image file's size will be zero.
        r.raw.decode_content = True
        
        # Open a local file with wb ( write binary ) permission and write the contents of the response to it.
        with open(filename,'wb') as f:
            shutil.copyfileobj(r.raw, f)
        
        print('Image sucessfully Downloaded: ',filename)
    else:
        print('Image Couldn\'t be retreived')

In [27]:
client = OpenAI()

image_prompt = generate_dall_e_prompt(client, s.clue_sentence, s.solution_unjumbled)

image_url = generate_image(client, image_prompt)

In [47]:
filename = config.DATA_DIR / "images/tst.jpg"
download_image(image_url, filename)

Image sucessfully Downloaded:  /Users/badrbenmbarek/Documents/work/perso/pastiche/data/images/tst.jpg


In [55]:
from google.cloud import storage


def upload_blob(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket."""
    # The ID of your GCS bucket
    # bucket_name = "your-bucket-name"
    # The path to your file to upload
    # source_file_name = "local/path/to/file"
    # The ID of your GCS object
    # destination_blob_name = "storage-object-name"

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    # Optional: set a generation-match precondition to avoid potential race conditions
    # and data corruptions. The request to upload is aborted if the object's
    # generation number does not match your precondition. For a destination
    # object that does not yet exist, set the if_generation_match precondition to 0.
    # If the destination object already exists in your bucket, set instead a
    # generation-match precondition using its generation number.
    generation_match_precondition = 0

    blob.upload_from_filename(source_file_name, if_generation_match=generation_match_precondition)

    print(
        f"File {source_file_name} uploaded to {destination_blob_name}."
    )


In [57]:
upload_blob(
    bucket_name="pastiche-images",
    source_file_name="../data/images/v1/1.jpg",
    destination_blob_name="v1/1.jpg"
)

File ../data/images/v1/1.jpg uploaded to v1/1.jpg.


In [None]:
import pandas as pd
from pastiche.game import JumbleGame, JumbleGameCollection
from tqdm import tqdm

In [None]:
path = '../data/jumble_answers_data.json'

data = JumbleGameCollection.from_jumble_answers(path)

In [None]:
game = data.games[0]

In [None]:
game.solution, game.solution_unjumbled

In [None]:
def sanitize_solution(solution: str, letters: str, sub: str = "*"):
    """
    Sanitises a solution by replacing all letters present in solution by _ without touching the words special characters or worlds between ()
    example:
    sanitize_solution('DAY IN (AND) DAY OUT', 'DAYINDAYOUT')
    >>> '*** ** (AND) *** ***'
    """
    output = []
    in_parenthesis = False
    for character in solution:
        # Check if character is '(', and then we start skipping
        if character == '(':
            in_parenthesis = True
        # Check if character is ')', and then we stop skipping
        elif character == ')':
            in_parenthesis = False
        # If character is not in parenthesis & found in letters, replace it with sub
        if not in_parenthesis and character.upper() in letters.upper() and character.isalpha():
            output.append(sub)
        else: # Append the character as it is.
            output.append(character)
    return ''.join(output)


In [None]:
for s in solutions:
    sanitized = sanitize_solution(s.solution_unjumbled, s.solution)
    print(s.solution_unjumbled, "|", sanitized)
    assert len(sanitized) == len(s.solution_unjumbled)

In [None]:
from datetime import timedelta

In [None]:
from itertools import groupby

In [None]:
value_dates = [date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 5), date(2022, 1, 6), date(2022, 1, 10)]

date_range = [value_dates[0] + timedelta(days=x)
              for x in range((value_dates[-1]-value_dates[0]).days + 1)]

dates_to_check = [(t, t in value_dates) for t in date_range]

consecutive_dates = []
for k, g in groupby(enumerate(dates_to_check), lambda x: x[1]):
    if k[1]:
        consecutive_dates.append(list(map(lambda x: k[0], list(g))))

In [None]:
value_dates = [{'valueDate': 'Thursday, November 09 2023', 'elapsedTime': 4163}, {'valueDate': 'Thursday, November 09 2023', 'elapsedTime': 6661}, {'valueDate': 'Thursday, November 09 2023', 'elapsedTime': 4921}]

In [None]:
from datetime import datetime
from pastiche import config
from itertools import groupby

In [None]:
df = pd.DataFrame(value_dates)
df['valueDate'] = df['valueDate'].apply(
    lambda x: datetime.strptime(x, config.DISPLAY_DATE_FORMAT)
)
fastest_time = df['elapsedTime'].min()

df.set_index('valueDate', inplace=True)
df = df.resample("D").first()
df.sort_index(inplace=True)

played_dates = ~df['elapsedTime'].isnull()

consecutive_dates = []
for k, g in groupby(enumerate(played_dates), lambda x: x[1]):
    if k:
        consecutive_dates.append(list(map(lambda x: x[0], list(g))))
        
max_streak = max([len(t) for t in consecutive_dates])
current_streak = [len(t) for t in consecutive_dates][-1]

In [None]:
fastest_time

In [None]:
def prettify_elapsed_time()

In [None]:
millify(fastest_time, prefixes="min", precision=2)

In [None]:
fastest_time

In [None]:
replace_dict = {
        "“PURR” HIS REQUEST": "“PURR” (HIS) REQUEST",
        "ASSIGNED THE JOB": "ASSIGNED (THE) JOB",
        "FOR THE BIRDS": "FOR (THE) BIRDS",
        "GO HAND IN HAND": "GO HAND (IN) HAND",
        "ALL IN A DAY’S WORK": "ALL IN (A) DAY’S WORK",
        "LOST HIS SHIRT": "LOST (HIS) SHIRT",
        "HOT UNDER THE COLLAR": "HOT UNDER (THE) COLLAR",
        "BORE THE BRUNT OF IT": "BORE (THE) BRUNT OF IT",
        "BREAK IT TO HER": "BREAK (IT) TO HER",
        "FILLED THE BILL": "FILLED (THE) BILL",
        "ROAD TO RECOVERY": "ROAD (TO) RECOVERY",
        "COURT FOR THE COURT": "COURT FOR (THE) COURT",
        "MADE A NAME FOR HIMSELF": "MADE A NAME (FOR) HIMSELF",
        "“WRITE” FOR THE JOB": "“WRITE” FOR (THE) JOB",
        "LEFT THE PREMISES": "LEFT (THE) PREMISES",
        "DOWN TO A TRICKLE": "DOWN (TO) A TRICKLE",
        "END OF THE “RODE”": "END OF (THE) “RODE”",
        "RAISING HER CHILDREN": "RAISING (HER) CHILDREN",
        "BANNED THE BAND": "BANNED (THE) BAND",
        "HAD THE UPPER HAND": "HAD (THE) UPPER HAND",
        "BEAR TO THE RIGHT": "BEAR TO (THE) RIGHT",
        "SEEING WAS BELIEVING": "SEEING (WAS) BELIEVING",
        "THROUGH THE ROOF": "THROUGH (THE) ROOF",
        "HEART OF THE CITY": "HEART OF (THE) CITY",
        "KEEP AN OPEN MIND": "KEEP (AN) OPEN MIND",
        "HAS A “FLARE” FOR IT": "HAS (A) “FLARE” FOR IT",
        "LOST HIS BALANCE": "LOST (HIS) BALANCE",
        "DOWN THE HATCH": "DOWN (THE) HATCH",
        "REFUSE THE REFUSE": "REFUSE (THE) REFUSE",
        "TO SAY THE “LEASED”": "TO SAY (THE) “LEASED”",
        "“WAY” THEIR OPTIONS": "“WAY” (THEIR) OPTIONS"
     }

# df['solution_unjumbled'] = df['solution_unjumbled'].replace()

# image

# OpenAI exploration

In [None]:
import openai
from pydantic import Field
from openai_function_call import OpenAISchema

In [None]:
# class JumbleClue(OpenAISchema):
#     """Class representing a sentence used as a clue for a Jumble game"""
#     clue_sentence: str = Field(..., description="Sentence with gaps to be filled.")
#     solution: str = Field(..., description="The words to use to fill the gaps in the clue-sentence. The solution can be made of multiple words and these words are not separated by a space.")
#     full_sentence: str = Field(..., description="The full sentence with all words.")
    
# class JumbleCluesCollection(OpenAISchema):
#     """List of jumble clues"""
#     clues: list[JumbleClue]
    
# prompt1 = """
# Consider the data below: '\n{data}'. 
#                 For each entry in the list, follow the following steps:
#                 1. identify where are the gaps in the `clue_sentence`.
#                 2. reconstruct the correct words using the `solution` by adding missing spaces
#                 3. use the reconstructed solution to create a `full_sentence` by filling the gaps in the `clue_sentence`
# """


In [None]:
class JumbleClue(OpenAISchema):
    """Class representing a sentence used as a clue for a Jumble game"""
    word: str = Field(..., description="A list of letters defining one or multiple words without any space")
    word_with_spaces: str | None = Field(..., description="list of words recreated from `word` with the missing spaces added")
    
class JumbleCluesCollection(OpenAISchema):
    """List of jumble clues"""
    clues: list[JumbleClue]
   
prompt2 = """
Consider the data below: '\n{data}'. For each word, reconstruct the correct words by adding missing spaces.
"""

In [None]:
def generate_jumble_clues(data: str) -> JumbleCluesCollection:
    completion = openai.ChatCompletion.create(
        model="gpt-4",
        temperature=0.1,
        functions=[JumbleCluesCollection.openai_schema],
        function_call={"name": JumbleCluesCollection.openai_schema["name"]},
        messages=[
            {
                "role": "user",
                "content": f"Consider the data below: '\n{data}'. For each word, reconstruct the correct words by adding missing spaces. If you can't find a match please return None."
            },
        ],
        max_tokens=1000,
    )
    return JumbleCluesCollection.from_response(completion)

In [None]:
results = generate_jumble_clues(
    data=solutions[:10]
)

In [None]:
results