In [2]:
!pip install openai
import openai
import numpy as np
import pandas as pd
import os
import sys
from sentence_transformers import SentenceTransformer, util
import torch
import time
import json



  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Specify the path to your JSON file
file_path = "/Users/ishan//Desktop/cs224n/02.json"

# Check if the file exists and is not empty
if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
    try:
        # Open the JSON file and load its data
        with open(file_path, 'r') as file:
            data = json.load(file)
        
        # Now 'data' contains the JSON data as a Python dictionary
        print(data)
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
    except Exception as e:
        print(f"An error occurred: {e}")
else:
    print("File does not exist or is empty.")

{'acrossmap': None, 'admin': False, 'answers': {'across': ['HERB', 'CROW', 'HAVE', 'AMOR', 'HOPE', 'TOLET', 'LIBERATED', 'INLET', 'ELEVATOR', 'LOIRE', 'RYDER', 'ASTERN', 'EASTER', 'IVAN', 'ASP', 'FLEECE', 'NANA', 'DELAYED', 'UNAGING', 'IRAN', 'GUARDS', 'NAY', 'TEST', 'ACCESS', 'TENREC', 'ETAPE', 'STELE', 'REASONER', 'TOROS', 'TELLSOVER', 'ADEPT', 'ETAL', 'LIVE', 'RODE', 'DENY', 'SLED'], 'down': ['HALER', 'EMILY', 'ROBED', 'BREVE', 'CHAT', 'ROTO', 'OPERATE', 'WED', 'HONORING', 'ALLINVAIN', 'VEER', 'ETTE', 'TILE', 'RAREFY', 'SECURE', 'TRENDS', 'ALEGAR', 'SEDUCE', 'ANNA', 'NAGY', 'ADIT', 'SERE', 'PLASTERED', 'ANTELOPE', 'ASSESS', 'ACCRETE', 'NEST', 'TOOLS', 'ANVIL', 'PEEVE', 'ERRED', 'STAR', 'TODO', 'ELAN', 'ALLY', 'TED']}, 'author': 'Martha J. DeWitt', 'autowrap': None, 'bbars': None, 'circles': None, 'clues': {'across': ['1. Fennel or sweet cicely', '5. Eat ___ (suffer humiliation)', '9. "To ___ and to Hold," Johnston novel', '13. Cupid', '14. Lange from Conn.', '15. House sign', '16. W

In [4]:
class Crossword:
    def __init__(self, data):
        self.data = data
        self.across_clues = {}
        self.down_clues = {}
        self.clue_to_positions = {}
        self.solution_dict = {}
        self.clue_grid = None
        self.neighbors = {}

    def initialize_solution_map(self):
        # first do across
        clues = self.data['clues']['across']
        answers = self.data['answers']['across']
        for i, clue in enumerate(clues):
            period_idx = clue.find('.')
            num, rest = clue[:period_idx], clue[period_idx+1:]
            self.solution_dict[f"{num}A"] = answers[i]

        # now do down
        clues = self.data['clues']['down']
        answers = self.data['answers']['down']
        for i, clue in enumerate(clues):
            period_idx = clue.find('.')
            num, rest = clue[:period_idx], clue[period_idx+1:]
            self.solution_dict[f"{num}D"] = answers[i]


    def initialize_clues(self):
        """
        Take in dictionary representing crossword and fill in dictionaries that hole clue codes (i.e. 1a/3d/18a/etc) 
        and map to corresppnding clue.
        """
        for clue in self.data['clues']['across']:
            period_idx = clue.find('.')
            num, rest = clue[:period_idx], clue[period_idx+1:]
            self.across_clues[f"{num}A"] = rest

        for clue in self.data['clues']['down']:
            period_idx = clue.find('.')
            num, rest = clue[:period_idx], clue[period_idx+1:]
            self.down_clues[f"{num}D"] = rest

    def initialize_clue_positions_mapping(self):
        """
        Take clue dictionary from self.across_clues and self.down_clues in the form {'1A': clue, etc ...}, 
        build a dictionary that maps clue ID to coordinates in grid
        """
        # first do across
        for clue in self.across_clues:
            num = int(clue[:-1])
            answer_len = len(self.solution_dict[clue])
            start = list(self.data['gridnums']).index(num)
            row, col = start // 15, start % 15 # convert from 1D array index to grid coord
            # this is across, so now that we have a start index, add corresponding coord to map
            coords = []
            for i in range(answer_len):
                coords.append((row, col + i))
            self.clue_to_positions[clue] = coords

        # now do down
        for clue in self.down_clues:
            num = int(clue[:-1])
            answer_len = len(self.solution_dict[clue])
            start = list(self.data['gridnums']).index(num)
            row, col = start // 15, start % 15 # convert from 1D array index to grid coord
            # this is across, so now that we have a start index, add corresponding coord to map
            coords = []
            for i in range(answer_len):
                coords.append((row + i, col))
            self.clue_to_positions[clue] = coords
    

    def initialize_clue_grid(self):
        """
        Represent a grid in the form of each cell being filled into to show what clue it corresponds to.
        For example:
        grid = [[('1A, 1D'), ('1A, 2D')],
                [('2A, 1D'), ('2A, 2D')]]
        """

        grid = [
            [[None, None] for _ in range(15)] for _ in range(15)
        ]
        
        for clue in self.across_clues.keys():
            coords = self.clue_to_positions[clue]
            for (x, y) in coords:
                grid[x][y][0] = clue

        for clue in self.down_clues.keys():
            coords = self.clue_to_positions[clue]
            for (x, y) in coords:
                grid[x][y][1] = clue

        self.clue_grid = grid


    def initialize(self):
        self.initialize_clues()
        self.initialize_solution_map()
        self.initialize_clue_positions_mapping()
        self.initialize_clue_grid()



In [6]:
trial = Crossword(data)
trial.initialize()

In [7]:
solutions = trial.solution_dict
down_answers = {}
across_answers = {}
for item in solutions:
    if item[-1] == 'D':
        down_answers[item] = solutions[item]
    else:
        across_answers[item] = solutions[item]
down_clues = trial.down_clues
across_clues = trial.across_clues

In [8]:
inputs = []

for item in across_clues:
    clue = across_clues[item]
    ans = across_answers[item]
    length = len(ans)
    input_text = str(clue) + ',' + ' ' + str(length) + ','
    inputs.append(input_text)

for item in down_clues:
    clue = down_clues[item]
    ans = down_answers[item]
    length = len(ans)
    input_text = str(clue) + ',' + ' ' + str(length) + ','
    inputs.append(input_text)



print(inputs)

[' Fennel or sweet cicely, 4,', ' Eat ___ (suffer humiliation), 4,', ' "To ___ and to Hold," Johnston novel, 4,', ' Cupid, 4,', ' Lange from Conn., 4,', ' House sign, 5,', ' What NOW wants women to be, 9,', ' Ocean arm, 5,', ' Follower of grain or freight, 8,', " Orleans's river, 5,", ' ___ Cup (golf prize), 5,', " Boatman's backward, 6,", ' March 26, 1978, 6,', ' Pavlov, 4,', ' Relative of a daboia, 3,', ' Defraud, 6,', ' Pram pusher, 4,', ' Put off, 7,', ' Describing eternal youth, 7,', " Pahlavi's country, 4,", ' Cerberus et al., 6,', ' Aye neutralizer, 3,', ' Put to the ___, 4,', ' Passageway, 6,', ' Madagascar mammal, 6,', ' Storehouse of a sort, 5,', ' Inscribed pillar, 5,', ' Newscaster, 8,', ' Bulls, in Barcelona, 5,', ' Repeats a report, 9,', ' Proficient, 5,', ' Abbr. often used on deeds, 4,', ' Kind of wire, 4,', ' Harassed, 4,', ' Abjure, 4,', ' Pung or monoski, 4,', " Item in a Czech's wallet, 5,", ' Girl in "Our Town", 5,', ' Togate, 5,', ' Longest modern musical note, 5,

In [14]:
answers = list(trial.solution_dict.values())

In [22]:
# input text: inputs
# solutions: answers

generated_answers = []
# Logistics and loading in model
!pip install openai==0.28

openai.api_key = 'sk-proj-8oLvnNGJLnlgW4SQOoHwT3BlbkFJ8c24SWE59CoO4sTxlDC7'

with open('/Users/ishan/Desktop/cs224n/fine_tuned_model_name.txt', 'r') as f:
    fine_tuned_model = f.read().strip()





In [23]:
def generate_top_answer(prompt, model, max_tokens=50):
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=max_tokens,
        n=1, 
        stop=None,
        temperature=0.9,
        top_p=0.9
    )
    top_answer = response['choices'][0]['message']['content'].strip()
    return top_answer



In [None]:
generated_answers = []
for i in range(len(inputs)):
    cur_answer = generate_top_answer(inputs[i], fine_tuned_model)
    generated_answers.append(cur_answer)
generated_answers

In [66]:
def extract_input_text(file_path):
    input_texts = []
    with open(file_path, 'r') as file:
        lines = file.readlines()
        for line in lines[:2500]:
            parts = line.strip().rsplit(', ', 2)  # Split into three parts from the right
            if len(parts) == 3:
                clue, length, _ = parts
                input_text = f"{clue}, {length},"
                input_texts.append(input_text)
    return input_texts

In [67]:
filepath = '/Users/ishan/Desktop/cs224n/gpt_3.5_test.txt'  
input_texts = extract_input_text(filepath)
input_texts

['Half of a stock market index name, 5,',
 'Tarkenton of three Super Bowls, 4,',
 'Former N.H.L. great, 3,',
 'Instruments used by the Beatles, 6,',
 '"I said ___!", 3,',
 'Backbone of a boat, 4,',
 'Linoleum cover, 3,',
 'Submarine, 5,',
 'Bartender?Æs supply, 6,',
 'Sched. uncertainty, 3,',
 'A commitment must be made here, 15,',
 'Kind of note, 4,',
 'Verizon competitor, 6,',
 'See 45-Across, 4,',
 'Gets concrete results?, 5,',
 'Veep from Tennessee, 6,',
 'Familia members, 4,',
 '___ deck, 4,',
 'Having attractive gams, 5,',
 'Condition, 5,',
 'Atlantic City attraction, 4,',
 'Super 8 alternative, 7,',
 'Private line?, 6,',
 'Part of a circle, 3,',
 '"Don\'t Tell ___" ("Cabaret" song), 4,',
 'Winter Olympics vehicle, 4,',
 'Abrogate a peace treaty, maybe, 5,',
 'Retirement nest eggs, 4,',
 'Card holder at a casino, 4,',
 '&#9794; and &#9792;, 5,',
 'Treasure of the Sierra Madre, 3,',
 'Sister of Calliope, 6,',
 'Makes fun of, in a way, 5,',
 '20-, 39- or 53-Across, say, 3,',
 'Hair

In [None]:
# Testing on 625 lines
testing_inputs = []
import random
if len(input_texts) < 625:
    raise ValueError("The list does not contain enough items.")

random_lines = random.sample(input_texts, 625)

# Print or use the random lines as needed
for line in random_lines:
    print(line.strip())
    testing_inputs.append(line)
len(testing_inputs)



In [None]:
generated_answers = []
for i in range(len(testing_inputs)):
    cur_answer = generate_top_answer(testing_inputs[i], fine_tuned_model)
    generated_answers.append(cur_answer)
generated_answers

In [None]:
len(generated_answers)

In [73]:
import random
import openai

# Function to read the file and extract lines
def read_file(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    return lines

# Function to extract clues and answers from lines
def extract_clues_answers(lines):
    clues = []
    answers = []
    for line in lines:
        parts = line.rsplit(',', 2)
        if len(parts) == 3:
            clue = parts[0].strip() + ',' + parts[1].strip() + ','
            answer = parts[2].strip()
            clues.append(clue)
            answers.append(answer)
    return clues, answers

# Function to generate solutions using the language model
def generate_top_answer(prompt, model, max_tokens=50):
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=max_tokens,
        n=1,  # Request only one completion
        stop=None,
        temperature=0.9,
        top_p=0.9
    )
    top_answer = response['choices'][0]['message']['content'].strip()
    return top_answer

# Function to get generated solutions for a list of clues
def get_generated_solutions(clues, model):
    generated_solutions = []
    for clue in clues:
        generated_solution = generate_top_answer(clue, model)
        generated_solutions.append(generated_solution)
    return generated_solutions

# Function to calculate accuracy
def calculate_accuracy(actual_solutions, generated_solutions):
    correct = 0
    for actual, generated in zip(actual_solutions, generated_solutions):
        if actual.lower() == generated.lower():
            correct += 1
    accuracy = correct / len(actual_solutions)
    return accuracy

# Main function
def main(file_path, model):
    lines = read_file(file_path)
    
    if len(lines) < 625:
        raise ValueError("The file does not contain enough lines.")
        
    random_lines = random.sample(lines, 625)
    clues, actual_solutions = extract_clues_answers(random_lines)
    generated_solutions = get_generated_solutions(clues, model)
    accuracy = calculate_accuracy(actual_solutions, generated_solutions)
    
    print(f'Accuracy: {accuracy:.2%}')
    return actual_solutions, generated_solutions, accuracy

# Replace with your actual file path and model name
file_path = '/Users/ishan/Desktop/cs224n/gpt_3.5_test.txt'
model = fine_tuned_model

# Execute the main function
actual_solutions, generated_solutions, accuracy = main(file_path, model)

# Optionally print actual vs generated solutions for inspection
for actual, generated in zip(actual_solutions, generated_solutions):
    print(f'Actual: {actual}, Generated: {generated}')


Accuracy: 40.00%
Actual: ATSEA, Generated: ONTOP
Actual: CRIMEAN, Generated: CRIMEAN
Actual: AREARUG, Generated: THROWRUG
Actual: ABOUTTHAT, Generated: REMINDME
Actual: SAFE, Generated: NOD
Actual: ORE, Generated: ORE
Actual: REDTAPE, Generated: REDTAPE
Actual: POSIT, Generated: BETON
Actual: TONER, Generated: TONER
Actual: IKE, Generated: TRIC
Actual: SUD, Generated: SUH
Actual: AMIS, Generated: ILES
Actual: INLETS, Generated: DELTAE
Actual: GASP, Generated: EXCL
Actual: LAURA, Generated: LAURA
Actual: ENTER, Generated: ENTER
Actual: MAINS, Generated: YJOINTS
Actual: FREELY, Generated: EASILY
Actual: OOLALA, Generated: OOLALA
Actual: SEW, Generated: BAD
Actual: PAR, Generated: RAT
Actual: STAKED, Generated: STAKED
Actual: RETAINER, Generated: FLOSSESP
Actual: PARINGS, Generated: ZESTERS
Actual: ORAN, Generated: ORAN
Actual: KHAN, Generated: EMIR
Actual: RESEDA, Generated: CELDON
Actual: EXES, Generated: YORE
Actual: OSO, Generated: OBR
Actual: OFFENSE, Generated: OFFENSE
Actual: RPI, 

In [74]:
actual_solutions

['ATSEA',
 'CRIMEAN',
 'AREARUG',
 'ABOUTTHAT',
 'SAFE',
 'ORE',
 'REDTAPE',
 'POSIT',
 'TONER',
 'IKE',
 'SUD',
 'AMIS',
 'INLETS',
 'GASP',
 'LAURA',
 'ENTER',
 'MAINS',
 'FREELY',
 'OOLALA',
 'SEW',
 'PAR',
 'STAKED',
 'RETAINER',
 'PARINGS',
 'ORAN',
 'KHAN',
 'RESEDA',
 'EXES',
 'OSO',
 'OFFENSE',
 'RPI',
 'RITEAID',
 'LAIT',
 'APORT',
 'BCC',
 'EMS',
 'ATWORST',
 'PEA',
 'BOZ',
 'OLIVE',
 'DONG',
 'MOANA',
 'FONTAL',
 'ONEBYONE',
 'LAMBS',
 'SOFA',
 'MRI',
 'ALARUM',
 'NEA',
 'EDAM',
 'LASTLEG',
 'CEASES',
 'LOST',
 'ARENA',
 'KEEL',
 'OILER',
 'TYPEA',
 'TOW',
 'AIL',
 'BALE',
 'LEX',
 'SCAR',
 'NOSIR',
 'CHIPS',
 'PERU',
 'SRO',
 'SUGARCOAT',
 'INITS',
 'YET',
 'PLOTS',
 'STAIR',
 'TEEN',
 'ETHER',
 'ROUE',
 'HANGTEN',
 'SADNESS',
 'MITE',
 'ODORS',
 'INTRO',
 'AMUCK',
 'NIGH',
 'POPCULTURE',
 'ABAR',
 'UKRAINE',
 'ALARM',
 'SSE',
 'GOT',
 'DABBA',
 'STILL',
 'CUE',
 'COROLLA',
 'SITARS',
 'EARTH',
 'BASEL',
 'SORER',
 'VINE',
 'FLU',
 'VSIGNS',
 'PEARY',
 'COGITO',
 'GAO',
 'B

In [75]:
def accuracy(answers, generated_answers):
    array1 = np.array(answers)
    array2 = np.array(generated_answers)

    # Compare the arrays element-wise and count the matches
    matches = np.sum(array1 == array2)

    return matches / len(array1)

In [76]:
accuracy(actual_solutions, generated_solutions)

0.4

In [35]:
# calculate by letter accuracy
def letter_accuracy(words, guesses):
    correct_letters, total_letters = 0, 0
    for i in range(len(guesses)):
        word, guess = words[i], guesses[i]
        # null guesses
        if guess == "NULL":
            total_letters += len(word)
        else:
            # correct guess
            if word == guess:
                correct_letters += len(word)
                total_letters += len(word)

            else:
                # Case 1: guess too short, adding padding
                if len(word) > len(guess):
                    while len(guess) < len(word):
                        guess += '!'
                # Case 2: guess too long, crop to len(word)
                elif len(word) < len(guess):
                    guess = guess[:len(word)]

                # Word, Guess now guaranteed to be same length
                for i in range(len(word)):
                    if word[i] == guess[i]:
                        total_letters += 1
                        correct_letters += 1
                    else:
                        total_letters += 1
    return correct_letters, total_letters

In [77]:
correct, total = letter_accuracy(actual_solutions, generated_solutions)
print(f"Accuracy is {correct/total}")

Accuracy is 0.45619624084103216


In [78]:
set_word_len = set([len(word) for word in actual_solutions])
for length in set_word_len:
    idxs = [i for i in range(len(actual_solutions)) if len(actual_solutions[i]) == length]
    subgroup_words = [actual_solutions[i] for i in idxs]
    subgroup_guesses = [generated_solutions[i] for i in idxs]
    correct, total = letter_accuracy(subgroup_words, subgroup_guesses)
    print(f"Correct {length}-letter prediction accuracy: {correct / total}")

Correct 3-letter prediction accuracy: 0.5478260869565217
Correct 4-letter prediction accuracy: 0.5472222222222223
Correct 5-letter prediction accuracy: 0.5153284671532846
Correct 6-letter prediction accuracy: 0.43248945147679324
Correct 7-letter prediction accuracy: 0.4211822660098522
Correct 8-letter prediction accuracy: 0.2413793103448276
Correct 9-letter prediction accuracy: 0.18518518518518517
Correct 10-letter prediction accuracy: 0.14
Correct 11-letter prediction accuracy: 1.0
Correct 13-letter prediction accuracy: 1.0
Correct 15-letter prediction accuracy: 0.13333333333333333


In [79]:
!pip install gensim

# importing all necessary modules
from gensim.models import Word2Vec
from gensim.models import KeyedVectors

def load_embedding_model():
    """ Load GloVe Vectors
        Return:
            wv_from_bin: All 400000 embeddings, each length 200
    """
    import gensim.downloader as api
    wv_from_bin = api.load("glove-wiki-gigaword-200")
    print("Loaded vocab size %i" % len(list(wv_from_bin.index_to_key)))
    return wv_from_bin
wv_from_bin = load_embedding_model()

Loaded vocab size 400000


In [53]:
def cos_similarity_incl_null(words, guesses):
    cos_sim = []
    for i in range(len(words)):
        try:
            cos_sim.append(wv_from_bin.distance(words[i].lower(), guesses[i].lower()))
        except:
            cos_sim.append(0)
    return sum(cos_sim) / len(cos_sim)

def cos_similarity_excl_null(words, guesses):
    cos_sim = []
    for i in range(len(words)):
        try:
            cos_sim.append(wv_from_bin.distance(words[i].lower(), guesses[i].lower()))
        except:
            pass
    if len(cos_sim):
        return sum(cos_sim) / len(cos_sim)
    else:
        return 0

In [80]:
print(f"Cosine similarity w/ 0s for null guesses: {cos_similarity_incl_null(actual_solutions, generated_solutions)}")
print(f"Cosine similarity excluding null guesses: {cos_similarity_excl_null(actual_solutions, generated_solutions)}")

Cosine similarity w/ 0s for null guesses: 0.30846804798096417
Cosine similarity excluding null guesses: 0.4293820266995604


In [81]:
for length in set_word_len:
    idxs = [i for i in range(len(answers)) if len(actual_solutions[i]) == length]
    subgroup_words = [actual_solutions[i] for i in idxs]
    subgroup_guesses = [actual_solutions[i] for i in idxs]
    print(f"Cosine similarity {length}-letter w/ 0s for null guesses: {cos_similarity_incl_null(subgroup_words, subgroup_guesses)}")
    print(f"Cosine similarity {length}-letter excluding null guesses: {cos_similarity_excl_null(subgroup_words, subgroup_guesses)}")
    print()

Cosine similarity 3-letter w/ 0s for null guesses: 6.622738308376736e-09
Cosine similarity 3-letter excluding null guesses: 6.622738308376736e-09

Cosine similarity 4-letter w/ 0s for null guesses: 7.450580596923828e-09
Cosine similarity 4-letter excluding null guesses: 7.450580596923828e-09

Cosine similarity 5-letter w/ 0s for null guesses: 3.311369154188368e-09
Cosine similarity 5-letter excluding null guesses: 4.584972675030048e-09

Cosine similarity 6-letter w/ 0s for null guesses: 7.450580596923828e-09
Cosine similarity 6-letter excluding null guesses: 9.934107462565104e-09

Cosine similarity 7-letter w/ 0s for null guesses: 0.0
Cosine similarity 7-letter excluding null guesses: 0.0

Cosine similarity 8-letter w/ 0s for null guesses: 0.0
Cosine similarity 8-letter excluding null guesses: 0.0

Cosine similarity 9-letter w/ 0s for null guesses: 2.9802322387695312e-08
Cosine similarity 9-letter excluding null guesses: 5.960464477539063e-08



ZeroDivisionError: division by zero

In [82]:
def is_correct_length(actual_answers, generated_answers):
    acc_array = np.array(actual_answers)
    gen_array = np.array(generated_answers)
    matches = np.sum([len(acc_array[i]) == len(gen_array[i]) for i in range(len(acc_array))])
    return matches / len(acc_array)

is_correct_length(actual_solutions, generated_solutions)

0.856