In [27]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [51]:
import functools
import pandas as pd
import re

from IPython.display import display

from cryptic.evaluation.evaluations import MetroQA
from cryptic.models.oai import OpenAIQA
from cryptic.models.selectors import SelfConsistencySelector
from cryptic.models.validators import SubWordConsistencyValidator
from cryptic.prompts.interface import PromptInterface

In [52]:
JSON_FILENAME = "website/crosswordData.json"
DUMMY_CLUE = "A farmer's son"
NUM_CLUES = None
NUM_ANSWERS = 10

Check that injection of clue and num letters into prompt template is working correctly:

In [53]:
prompt_interface = PromptInterface("cryptic.prompts.cot.ben_v1")
prompt = prompt_interface.inject_prompt(DUMMY_CLUE, 5)
print(prompt)

I will answer some clues:
    1. Boy captures the work of Shakespeare, for example (7)
    Answer: SONNETS (examples of works of Shakespeare) - SON (boy) + NETS (captures)
    2. Country folk appearing in operatic piece (7)
    Answer: ARMENIA (country) - ARIA (operatic piece) + MEN (folk)
    3. Where men are on board ship beside revolutionary (5)
    Answer: CHESS (where men are on board) - CHE (Revolutionary, Che Guevara) + SS (designation for a ship)
    4. Amputate limbs to remove weapons (6)
    Answer: DISARM (Remove weapons  - DIS-ARM (amputating limbs)
    5. Accuse one politician apiece (7)
    Answer: IMPEACH (accuse) - I (one) + MP (politician) + EACH (apiece)
    6. Orchestra was taken aback by cutting machine (7)
    Answer: BANDSAW (cutting machine) - BAND (orchestra) + SAW (was, 'taken aback' i.e. backwards)
    7. Concerning drama in second game after draw (6)
    Answer: REPLAY (a second game, typically following a drawn result in the first) - RE (meaning concerning) 

Check extraction of answer is working correctly

In [54]:
out = prompt_interface.prompt_templates["example_output"]

In [55]:
res = prompt_interface.extract_answer(out)
res

{'response': 'Answer: REPLAY (a second game, typically following a drawn result in the first) - RE (meaning concerning) + PLAY (a type of drama)',
 'answer': 'REPLAY',
 'predicted_definition': 'a second game, typically following a drawn result in the first',
 'wordplay': 'RE (meaning concerning) + PLAY (a type of drama)'}

In [56]:
prompt_interface.decompose(res["wordplay"])

[{'answer': 'RE', 'definition': 'meaning concerning'},
 {'answer': 'PLAY', 'definition': 'a type of drama'}]

In [57]:
validator = SubWordConsistencyValidator(prompt_interface)

In [58]:
evaluation = MetroQA(JSON_FILENAME, num_answers=NUM_ANSWERS, num_clues=NUM_CLUES)

In [59]:
model = OpenAIQA(
    prompt_interface=prompt_interface,
    model_name="text-davinci-003",
    max_tokens=64,
    validators=[validator],
    validator_names=["consistent_decomposition"]
)

In [60]:
df = evaluation.qa_frame.df
df.head(3)

Unnamed: 0,clue,answer,startx,starty,position,orientation,num_letters,rowid
0,Liquor of particular make needed by youth lead...,xxxxxx,1,1,1,across,6,0
1,Not the HQ of Britain's capital stockbreeding ...,xxxxxx,1,1,1,down,6,1
2,Amputate limbs to remove weapons (6),xxxxxx,8,1,4,across,6,2


First feed a single clue through the API to look at model output and check that we are correctly parsing responses.

This might need to be tweaked: e.g. removal of newline / space chars etc,
to convert into a standard answer format

In [61]:
clue, num_letters = evaluation.qa_frame.sample(1).df.iloc[0][["clue", "num_letters"]]

In [62]:
response = model.get_response(clue, num_letters, num_answers=2)
response

<OpenAIObject text_completion id=cmpl-759ubvbaXSAF8ZJ94cGGVGD8R21h3 at 0x10f0b0d60> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": "\nAnswer: TRENDY (stylish) - TRY (trying) + END (in a way) + Y (way)."
    },
    {
      "finish_reason": "stop",
      "index": 1,
      "logprobs": null,
      "text": "\nAnswer: FASHIONABLE (trying to be stylish in a way) - FASH (in a way) + ION (stylish) + ABLE (trying)."
    }
  ],
  "created": 1681464097,
  "id": "cmpl-759ubvbaXSAF8ZJ94cGGVGD8R21h3",
  "model": "text-davinci-003",
  "object": "text_completion",
  "usage": {
    "completion_tokens": 68,
    "prompt_tokens": 365,
    "total_tokens": 433
  }
}

In [63]:
out = response["choices"][0]["text"]

In [64]:
res = prompt_interface.extract_answer(response["choices"][0]["text"])
res

{'response': '\nAnswer: TRENDY (stylish) - TRY (trying) + END (in a way) + Y (way).',
 'answer': 'TRENDY',
 'predicted_definition': 'stylish',
 'wordplay': 'TRY (trying) + END (in a way) + Y (way).'}

In [65]:
prompt_interface.decompose(res["wordplay"])

[{'answer': 'TRY', 'definition': 'trying'},
 {'answer': 'END', 'definition': 'in a way'},
 {'answer': 'Y', 'definition': 'way'}]

In [66]:
validator.validate(res["answer"], res["predicted_definition"], res["wordplay"])

False

Once we're confident that answers are being extracted correctly we can run a whole set of clues through

In [67]:
metrics, answer_df = evaluation.run(model)

24it [01:19,  3.31s/it]


In [68]:
model.validator_names

['consistent_decomposition']

In [69]:
pred_cols = ["prediction", "predicted_definition", "wordplay"]
vis_cols = pred_cols + model.validator_names + ["prediction_satisfies_constraints"]

In [47]:
for i in range(NUM_ANSWERS):
    display(answer_df[["clue", "answer"] + [c + f"-{i}" for c in vis_cols]])

Unnamed: 0,clue,answer,prediction-0,predicted_definition-0,wordplay-0,consistent_decomposition-0,prediction_satisfies_constraints-0
0,Liquor of particular make needed by youth lead...,xxxxxx,MOONSHINE,liquor of particular make,MOON (youth leader) + SHINE (needed),True,False
1,Not the HQ of Britain's capital stockbreeding ...,xxxxxx,,,,True,False
2,Amputate limbs to remove weapons (6),xxxxxx,,,,True,False
3,"Trying to be stylish in a way (5,1,7)",xxxxxyzzzzzzz,PREENING,trying to be stylish,PREEN (in a way) + IN + G (style),False,False
4,"Temporary leader in dramatic game (6,7)",xxxxxxyyyyyyy,INTERIMS,temporary leader,INTER (dramatic game) + IMS (leader),True,False
5,Material that could be mined (5),xxxxx,,,,True,False
6,Sweetmeat of male injured by vehicle (7),xxxxxxx,CANDIED,sweetmeat,CAND (male) + IED (injured by vehicle),True,True
7,Accuse one politician apiece (7),xxxxxxx,IMPEACH,accuse,I (one) + MP (politician) + EACH (apiece),True,True
8,Twitch before at the top of the house (5),xxxxx,ATTIC,top of the house,AT (before) + TIC (twitch),True,True
9,A length being planned (5),xxxxx,SPAN,a length,S (being) + PAN (planned),True,False


Unnamed: 0,clue,answer,prediction-1,predicted_definition-1,wordplay-1,consistent_decomposition-1,prediction_satisfies_constraints-1
0,Liquor of particular make needed by youth lead...,xxxxxx,MOJITO,liquor,MO (make) + JIT (youth leader) + O (particular),True,True
1,Not the HQ of Britain's capital stockbreeding ...,xxxxxx,,,,True,False
2,Amputate limbs to remove weapons (6),xxxxxx,,,,True,False
3,"Trying to be stylish in a way (5,1,7)",xxxxxyzzzzzzz,VOGUEING,trying to be stylish,VOGUE (in a way) + ING (trying),True,False
4,"Temporary leader in dramatic game (6,7)",xxxxxxyyyyyyy,INTERIMS,temporary leader,INT (dramatic) + ERIMS (game),True,False
5,Material that could be mined (5),xxxxx,OREAL,material that could be mined,ORE (mined material) + AL (could be),True,True
6,Sweetmeat of male injured by vehicle (7),xxxxxxx,CANDIES,sweetmeat,CAN (vehicle) + DIES (male injured),True,True
7,Accuse one politician apiece (7),xxxxxxx,IMPEACH,accuse,I (one) + MP (politician) + EACH (apiece),True,True
8,Twitch before at the top of the house (5),xxxxx,ATTIC,top of the house,AT (before) + TIC (twitch),True,True
9,A length being planned (5),xxxxx,SPAN,a length,S (being) + PAN (planned),True,False


Unnamed: 0,clue,answer,prediction-2,predicted_definition-2,wordplay-2,consistent_decomposition-2,prediction_satisfies_constraints-2
0,Liquor of particular make needed by youth lead...,xxxxxx,TEENER,youth leader,"TEEN (youth) + ER (make, as in brand or manufa...",True,True
1,Not the HQ of Britain's capital stockbreeding ...,xxxxxx,POULTRY,stockbreeding centre,HQ (headquarters) + LONDON (Britain's capital),False,False
2,Amputate limbs to remove weapons (6),xxxxxx,,,,True,False
3,"Trying to be stylish in a way (5,1,7)",xxxxxyzzzzzzz,FASHIONABLE,trying to be stylish in a way,FASHION (in a way) + ABLE (trying),True,False
4,"Temporary leader in dramatic game (6,7)",xxxxxxyyyyyyy,,,,True,False
5,Material that could be mined (5),xxxxx,,,,True,False
6,Sweetmeat of male injured by vehicle (7),xxxxxxx,CARAMEL,sweetmeat,CAR (vehicle) + MEL (male) - EL (injured),False,True
7,Accuse one politician apiece (7),xxxxxxx,IMPEACH,accuse,I (one) + MP (politician) + EACH (apiece),True,True
8,Twitch before at the top of the house (5),xxxxx,ATTIC,top of the house,AT (Twitch) + TIC (before),True,True
9,A length being planned (5),xxxxx,SPAN,a length,S (being) + PAN (planned),True,False


Unnamed: 0,clue,answer,prediction-3,predicted_definition-3,wordplay-3,consistent_decomposition-3,prediction_satisfies_constraints-3
0,Liquor of particular make needed by youth lead...,xxxxxx,BRANDY,liquor,"BRAN (youth leader) + DY (make, as in a partic...",True,True
1,Not the HQ of Britain's capital stockbreeding ...,xxxxxx,,,,True,False
2,Amputate limbs to remove weapons (6),xxxxxx,,,,True,False
3,"Trying to be stylish in a way (5,1,7)",xxxxxyzzzzzzz,FASHIONABLE,trying to be stylish in a way,FASH (in a way) + ION (trying) + ABLE (stylish).,True,False
4,"Temporary leader in dramatic game (6,7)",xxxxxxyyyyyyy,INTERIM,temporary leader,INTER (dramatic game) + IM (in),True,False
5,Material that could be mined (5),xxxxx,,,,True,False
6,Sweetmeat of male injured by vehicle (7),xxxxxxx,CANDIES,sweetmeat,"CAN (vehicle) + DIES (male, past tense of 'die...",False,True
7,Accuse one politician apiece (7),xxxxxxx,IMPEACH,accuse,I (one) + MP (politician) + EACH (apiece),True,True
8,Twitch before at the top of the house (5),xxxxx,,,,True,False
9,A length being planned (5),xxxxx,SPAN,a length,S (being) + PLAN (planned),False,False


Unnamed: 0,clue,answer,prediction-4,predicted_definition-4,wordplay-4,consistent_decomposition-4,prediction_satisfies_constraints-4
0,Liquor of particular make needed by youth lead...,xxxxxx,MALTED,liquor,"MADE (particular make) + LT (youth leader, Lie...",True,True
1,Not the HQ of Britain's capital stockbreeding ...,xxxxxx,,,,True,False
2,Amputate limbs to remove weapons (6),xxxxxx,,,,True,False
3,"Trying to be stylish in a way (5,1,7)",xxxxxyzzzzzzz,,,,True,False
4,"Temporary leader in dramatic game (6,7)",xxxxxxyyyyyyy,INTERIMS,temporary leader,INTER (dramatic game) + IMS (leader),True,False
5,Material that could be mined (5),xxxxx,,,,True,False
6,Sweetmeat of male injured by vehicle (7),xxxxxxx,BONBON,sweetmeat,BON (male) + BON (injured) + V (vehicle),False,False
7,Accuse one politician apiece (7),xxxxxxx,IMPEACH,accuse,I (one) + MP (politician) + EACH (apiece),True,True
8,Twitch before at the top of the house (5),xxxxx,ATTIC,top of the house,"AT (Twitch, as in the verb) + TIC (before)",True,True
9,A length being planned (5),xxxxx,SPAN,a length,S (being) + PAN (planned),True,False


Unnamed: 0,clue,answer,prediction-5,predicted_definition-5,wordplay-5,consistent_decomposition-5,prediction_satisfies_constraints-5
0,Liquor of particular make needed by youth lead...,xxxxxx,BRANDY,liquor,"BR (abbreviation for 'youth leader', Boy/Girl ...",False,True
1,Not the HQ of Britain's capital stockbreeding ...,xxxxxx,LIVERY,not the HQ of Britain's capital stockbreeding ...,LI (not) + VERY (HQ),True,True
2,Amputate limbs to remove weapons (6),xxxxxx,,,,True,False
3,"Trying to be stylish in a way (5,1,7)",xxxxxyzzzzzzz,PREENING,trying to be stylish,PRE (in a way) + ENING (trying),True,False
4,"Temporary leader in dramatic game (6,7)",xxxxxxyyyyyyy,,,,True,False
5,Material that could be mined (5),xxxxx,,,,True,False
6,Sweetmeat of male injured by vehicle (7),xxxxxxx,CANDIES,sweetmeat,CAN (vehicle) + DIES (male injured),True,True
7,Accuse one politician apiece (7),xxxxxxx,IMPEACH,accuse,I (one) + MP (politician) + EACH (apiece),True,True
8,Twitch before at the top of the house (5),xxxxx,ROOFT,top of the house,RT (twitch) + OOF (before),True,True
9,A length being planned (5),xxxxx,SPAN,length,S (being) + PLAN (planned),False,False


Unnamed: 0,clue,answer,prediction-6,predicted_definition-6,wordplay-6,consistent_decomposition-6,prediction_satisfies_constraints-6
0,Liquor of particular make needed by youth lead...,xxxxxx,BRANDY,liquor,BRAND (particular make) + Y (youth leader),True,True
1,Not the HQ of Britain's capital stockbreeding ...,xxxxxx,LIVERY,not the HQ of Britain's capital,LI (stockbreeding centre) + VERY (not),True,True
2,Amputate limbs to remove weapons (6),xxxxxx,,,,True,False
3,"Trying to be stylish in a way (5,1,7)",xxxxxyzzzzzzz,,,,True,False
4,"Temporary leader in dramatic game (6,7)",xxxxxxyyyyyyy,INTERIMS,temporary leader,INTER (dramatic) + IMS (game),True,False
5,Material that could be mined (5),xxxxx,,,,True,False
6,Sweetmeat of male injured by vehicle (7),xxxxxxx,MARZIPAN,sweetmeat,"MAN (male) + RZ (injured by vehicle, i.e. 'RZ'...",False,False
7,Accuse one politician apiece (7),xxxxxxx,IMPEACH,accuse,I (one) + MP (politician) + EACH (apiece),True,True
8,Twitch before at the top of the house (5),xxxxx,ROOFT,top of the house,"OOF (Twitch, backwards) + RT (Before, backwards)",True,True
9,A length being planned (5),xxxxx,SPAN,length,S (being) + PLAN (planned),False,False


Unnamed: 0,clue,answer,prediction-7,predicted_definition-7,wordplay-7,consistent_decomposition-7,prediction_satisfies_constraints-7
0,Liquor of particular make needed by youth lead...,xxxxxx,MOONSHINE,liquor,MOON (youth leader) + SHINE (make),True,False
1,Not the HQ of Britain's capital stockbreeding ...,xxxxxx,,,,True,False
2,Amputate limbs to remove weapons (6),xxxxxx,,,,True,False
3,"Trying to be stylish in a way (5,1,7)",xxxxxyzzzzzzz,FASHIONABLE,trying to be stylish,FASHION (in a way) + ABLE (able),True,False
4,"Temporary leader in dramatic game (6,7)",xxxxxxyyyyyyy,,,,True,False
5,Material that could be mined (5),xxxxx,,,,True,False
6,Sweetmeat of male injured by vehicle (7),xxxxxxx,MARMALAD,sweetmeat,MAL (male) + RAM (vehicle) + AD (injured),True,False
7,Accuse one politician apiece (7),xxxxxxx,IMPEACH,accuse,I (one) + MP (politician) + EACH (apiece),True,True
8,Twitch before at the top of the house (5),xxxxx,,,,True,False
9,A length being planned (5),xxxxx,SPAN,length,S (being) + PAN (planned),True,False


Unnamed: 0,clue,answer,prediction-8,predicted_definition-8,wordplay-8,consistent_decomposition-8,prediction_satisfies_constraints-8
0,Liquor of particular make needed by youth lead...,xxxxxx,BRANDY,liquor,BRAND (particular make) + Y (youth leader),True,True
1,Not the HQ of Britain's capital stockbreeding ...,xxxxxx,,,,True,False
2,Amputate limbs to remove weapons (6),xxxxxx,,,,True,False
3,"Trying to be stylish in a way (5,1,7)",xxxxxyzzzzzzz,PREENING,trying to be stylish,PREEN (in a way) + ING (suffix),True,False
4,"Temporary leader in dramatic game (6,7)",xxxxxxyyyyyyy,INTERIMP,Temporary leader,INTER (in) + IMP (dramatic game),True,False
5,Material that could be mined (5),xxxxx,,,,True,False
6,Sweetmeat of male injured by vehicle (7),xxxxxxx,MARMALAD,sweetmeat,MAL (male) + AD (vehicle) + ARM (injured),True,False
7,Accuse one politician apiece (7),xxxxxxx,IMPEACH,accuse,I (one) + MP (politician) + EACH (apiece),True,True
8,Twitch before at the top of the house (5),xxxxx,,,,True,False
9,A length being planned (5),xxxxx,SCHEME,a length being planned,SCHE (a length) + ME (being planned),True,False


Unnamed: 0,clue,answer,prediction-9,predicted_definition-9,wordplay-9,consistent_decomposition-9,prediction_satisfies_constraints-9
0,Liquor of particular make needed by youth lead...,xxxxxx,MOJITO,liquor,MO (make needed) + JIT (youth leader) + O (par...,True,True
1,Not the HQ of Britain's capital stockbreeding ...,xxxxxx,,,,True,False
2,Amputate limbs to remove weapons (6),xxxxxx,,,,True,False
3,"Trying to be stylish in a way (5,1,7)",xxxxxyzzzzzzz,PREENING,trying to be stylish,PRE (in a way) + EN (trying) + ING (way).,True,False
4,"Temporary leader in dramatic game (6,7)",xxxxxxyyyyyyy,INTERIMS,temporary leader,INTER (dramatic game) + IMS (leader),True,False
5,Material that could be mined (5),xxxxx,OREAL,material that could be mined,ORE (mined material) + AL (could be),True,True
6,Sweetmeat of male injured by vehicle (7),xxxxxxx,CANDIES,sweetmeat,MAN (male) + DIES (injured by vehicle),False,True
7,Accuse one politician apiece (7),xxxxxxx,IMPEACH,accuse,I (one) + MP (politician) + EACH (apiece),True,True
8,Twitch before at the top of the house (5),xxxxx,ATTIC,top of the house,AT (before) + TIC (twitch),True,True
9,A length being planned (5),xxxxx,SPAN,length,S (being) + PLAN (planned),False,False


I want to somehow use the validator columns to prioritise answers.

I think it might be useful to push the dataframe to long form.

In [70]:
pred_cols = ["prediction", "predicted_definition", "wordplay"] + model.validator_names + ["prediction_satisfies_constraints"]

In [92]:
class SelfConsistencySelector:

    def __init__(self, validator_names):
        self.validator_names = validator_names

    def make_long_df(self, answer_df):
        pred_cols = [
            "prediction",
            "predicted_definition",
            "wordplay",
            "response",
        ] + self.validator_names + ["prediction_satisfies_constraints"]
        long_df = pd.wide_to_long(answer_df, pred_cols, i="rowid", j="index", sep="-")
        return long_df

    def select(self, answer_df):
        long_df = self.make_long_df(answer_df)
        query = ' & '.join(self.validator_names + ["prediction_satisfies_constraints"])
        filtered_df = long_df.query(query).reset_index()
        selected_df = filtered_df.groupby("rowid").head(1)
        return selected_df

In [188]:
for i in range(10):
    _def = answer_df[f'predicted_definition-{i}'].replace("\n", "").apply(lambda x: f" ({x}) - ")
    answer_df[f"output-{i}"] = answer_df[f"prediction-{i}"] +_def+ answer_df[f"wordplay-{i}"]

In [189]:
row_ids = [7, 0, 8, 20]
wp_cols = [c for c in answer_df.columns if c.startswith("output")][4:7]
wordplay_cols = [c for c in answer_df.columns if c.startswith("wordplay")][4:7]
prediction_cols = [c for c in answer_df.columns if c.startswith("prediction")][4:7]

In [192]:
ix = 1
pd.set_option('max_colwidth', 500)

display(answer_df[answer_df["rowid"] == row_ids[ix]][["clue"] + wp_cols])

Unnamed: 0,clue,output-4,output-5,output-6
0,Liquor of particular make needed by youth leader (6),"MALTED (liquor) - MALT (particular make) + ED (youth leader, e.g. Edward)",BOURBON (liquor) - BON (particular make) + OUR (needed by youth leader),BRANDY (liquor) - BRAND (particular make) + Y (youth leader)


In [121]:
pd.set_option('max_colwidth', 800)
answer_df[answer_df["rowid"].isin([7, 0, 8, 20])][["clue"] + wordplay_cols]

Unnamed: 0,clue,wordplay-4,wordplay-5,wordplay-6
0,Liquor of particular make needed by youth leader (6),"MALT (particular make) + ED (youth leader, e.g. Edward)",BON (particular make) + OUR (needed by youth leader),BRAND (particular make) + Y (youth leader)
7,Accuse one politician apiece (7),I (one) + MP (politician) + EACH (apiece),I (one) + MP (politician) + EACH (apiece).,I (one) + MP (politician) + EACH (apiece)
8,Twitch before at the top of the house (5),AT (before) + TIC (twitch),AT (before) + TIC (twitch),AT (before) + TIC (twitch)
20,Woman with hat made to look like sword case (6),SCAR (woman) + BARD (hat),"SCAB (woman) + BARD (hat, as in the 'bard' of a hat)","SHE (woman) + ATH (made to look like, 'hat' backwards)"


In [93]:
selector = SelfConsistencySelector(model.validator_names)

In [128]:
long_df = selector.make_long_df(answer_df)
query = ' & '.join(model.validator_names + ["prediction_satisfies_constraints"])
filtered_df = long_df.query(query).reset_index()

In [144]:
cutoff = 9

In [145]:
gdf = pd.DataFrame(filtered_df.groupby("rowid")["prediction"].value_counts()).sort_values(["count", "rowid"], ascending=False)

In [146]:
gdf

Unnamed: 0_level_0,Unnamed: 1_level_0,count
rowid,prediction,Unnamed: 2_level_1
12,BANDSAW,10
8,ATTIC,10
7,IMPEACH,10
21,ABASE,9
15,REPLAY,9
19,TRIMMER,8
14,HOLDUP,8
17,STRIKE,4
0,BRANDY,4
20,SHEATH,3


In [152]:
selected_rows = gdf[gdf["count"]>=cutoff].reset_index()["rowid"].values

In [153]:
selected_rows

array([12,  8,  7, 21, 15])

In [94]:
selected = selector.select(answer_df)

In [115]:
selected

Unnamed: 0,rowid,index,starty,num_letters,clue,answer,startx,num_correct,orientation,position,correct,prediction,predicted_definition,wordplay,response,consistent_decomposition,prediction_satisfies_constraints
0,7,0,1,7,Accuse one politician apiece (7),IMPEACH,9,0,down,5,False,IMPEACH,accuse,I (one) + MP (politician) + EACH (apiece),Answer: IMPEACH (accuse) - I (one) + MP (polit...,True,True
1,8,0,5,5,Twitch before at the top of the house (5),ATTIC,9,0,across,11,False,ATTIC,twitch before,AT (at) + TIC (top of the house),\nAnswer: ATTIC (twitch before) - AT (at) + TI...,True,True
2,12,0,9,7,Orchestra was taken aback by cutting machine (7),BANDSAW,7,0,across,19,False,BANDSAW,cutting machine,"BAND (orchestra) + SAW (was, 'taken aback' i.e...",\nAnswer: BANDSAW (cutting machine) - BAND (or...,True,True
3,14,0,13,6,Cause delay in the one nearer the back (6),HOLDUP,1,0,across,22,False,HOLDUP,cause delay,HOLD (the one nearer the back) + UP (back),\nAnswer: HOLDUP (cause delay) - HOLD (the one...,True,True
4,15,0,13,6,Concering drama in second game after draw (6),REPLAY,8,0,across,23,False,REPLAY,"a second game, typically following a drawn res...",RE (meaning concerning) + PLAY (a type of drama),"\nAnswer: REPLAY (a second game, typically fol...",True,True
5,21,0,8,5,Humiliate because in Lincoln (5),ABASE,7,0,down,16,False,ABASE,humiliate,"A (because) + BASE (Lincoln, a city in the UK)",\nAnswer: ABASE (humiliate) - A (because) + BA...,True,True
6,0,1,1,6,Liquor of particular make needed by youth lead...,BRANDY,1,0,across,1,False,BRANDY,liquor,BRAN (youth leader) + DY (particular make),\nAnswer: BRANDY (liquor) - BRAN (youth leader...,True,True
7,1,1,1,6,Not the HQ of Britain's capital stockbreeding ...,AVIARY,1,0,down,1,False,AVIARY,not the HQ of Britain's capital,A (not) + VI (Roman numeral for 6) + ARY (stoc...,\nAnswer: AVIARY (not the HQ of Britain's capi...,True,True
8,5,1,1,5,Material that could be mined (5),OREAL,5,0,down,3,False,OREAL,Material that could be mined,ORE (mined) + AL (could be),\nAnswer: OREAL (Material that could be mined)...,True,True
14,17,1,1,6,Threat to workers by one in suit (6),STRIKE,13,0,down,7,False,STRIKE,threat to workers,S (one) + TR (suit) + IKE (threat),\nAnswer: STRIKE (threat to workers) - S (one)...,True,True


In [100]:
output_cols = ["clue", "answer", "startx", "starty", "position", "orientation"]
selected["answer"] = selected["prediction"].apply(lambda x: x.replace(" ", ""))
selected[output_cols].to_json("website/answers1.json", orient="records", indent=4)

In [37]:
len(answer_df), answer_df["correct"].sum()

(23, 8)

In [44]:
pd.read_json("website/crosswordData.json")

Unnamed: 0,clue,answer,startx,starty,position,orientation
0,Liquor of particular make needed by youth leader,xxxxxx,1,1,1,across
1,Not the HQ of Britain's capital stockbreeding ...,xxxxxx,1,1,1,down
2,Amputate limbs to remove weapons,xxxxxx,8,1,4,across
3,Trying to be stylish in a way,xxxxxyzzzzzzz,3,1,2,down
4,Temporary leader in dramatic game,xxxxxxyyyyyyy,1,3,9,across
5,Material that could be mined,xxxxx,5,1,3,down
6,Sweetmeat of male injured by vehicle,xxxxxxx,1,5,10,across
7,Accuse one politician apiece,xxxxxxx,9,1,5,down
8,Twitch before at the top of the house,xxxxx,9,5,11,across
9,A length being planned,xxxxx,2,7,12,across


In [38]:
answer_df

Unnamed: 0,rowid,clue,answer,definition,clue_number,puzzle_date,puzzle_name,source_url,source,num_letters,...,prediction_satisfies_constraints-1,prediction_satisfies_constraints-2,prediction_satisfies_constraints-3,prediction_satisfies_constraints-4,prediction_satisfies_constraints-5,prediction_satisfies_constraints-6,prediction_satisfies_constraints-7,prediction_satisfies_constraints-8,prediction_satisfies_constraints-9,correct
0,88,Intrepid daughter missing parent's sister (9),DAUNTLESS,Intrepid,1a,2020-09-29,Times Quick Cryptic 1711,https://times-xwd-times.livejournal.com/241434...,times_xwd_times,9,...,False,False,False,True,True,True,False,True,False,True
2,90,Training device transforming Liam's tour (9),SIMULATOR,Training device,8a,2020-09-29,Times Quick Cryptic 1711,https://times-xwd-times.livejournal.com/241434...,times_xwd_times,9,...,True,True,True,True,True,True,True,True,True,True
3,91,Plant serving sauce with meat (5),OXLIP,Plant,9a,2020-09-29,Times Quick Cryptic 1711,https://times-xwd-times.livejournal.com/241434...,times_xwd_times,5,...,True,True,True,True,True,False,True,True,True,False
4,92,Clothing submerged in northern river? (9),UNDERWEAR,Clothing,10a,2020-09-29,Times Quick Cryptic 1711,https://times-xwd-times.livejournal.com/241434...,times_xwd_times,9,...,False,False,False,False,False,False,False,False,False,False
5,93,"Popular act, without doubt (6)",INDEED,without doubt,12a,2020-09-29,Times Quick Cryptic 1711,https://times-xwd-times.livejournal.com/241434...,times_xwd_times,6,...,True,False,True,False,True,True,False,False,True,False
6,94,Position of rubbish in South America (6),STATUS,Position,13a,2020-09-29,Times Quick Cryptic 1711,https://times-xwd-times.livejournal.com/241434...,times_xwd_times,6,...,False,False,False,False,False,False,False,False,False,False
7,95,Element Mendeleev originally found in flowerin...,GERMANIUM,Element,16a,2020-09-29,Times Quick Cryptic 1711,https://times-xwd-times.livejournal.com/241434...,times_xwd_times,9,...,True,True,True,True,True,True,True,True,True,False
8,96,"Head of police called, revealing accident (5)",PRANG,accident,18a,2020-09-29,Times Quick Cryptic 1711,https://times-xwd-times.livejournal.com/241434...,times_xwd_times,5,...,False,True,False,False,False,False,False,False,False,False
9,97,Switch posts near ground (9),TRANSPOSE,Switch,19a,2020-09-29,Times Quick Cryptic 1711,https://times-xwd-times.livejournal.com/241434...,times_xwd_times,9,...,False,False,False,False,False,False,False,False,False,False
10,98,"With cunning, son locates youth leaders twice (5)",SLYLY,With cunning,21a,2020-09-29,Times Quick Cryptic 1711,https://times-xwd-times.livejournal.com/241434...,times_xwd_times,5,...,False,True,False,True,False,False,False,False,False,False
