## Build hash map

In [1]:
from string import ascii_lowercase

# a = 1, b = 2,..., z = 26
letter_scores = dict(zip(ascii_lowercase, range(1, 27)))

In [12]:
# returns an anagram-friendly version of a string, or throws an error if the string is not alphabetic
def clean_word(word):
    if not word.isalpha():
        raise ValueError("Words must be alphabetic strings.")
    return word.lower().strip()

# sums the hash values of letters in a word
def compute_word_score(word):
    cleaned_word = clean_word(word)
    scores = [letter_scores[letter] for letter in cleaned_word]
    return sum(scores)

In [13]:
# word score tests

test_word_1 = "abcd" # sum should be 1 + 2 + 3 + 4 = 10
score_1 = compute_word_score(test_word_1)
print("1st word score: " + str(score_1))

test_word_2 = "ee" # sum should be 5 + 5 = 10
score_2 = compute_word_score(test_word_2)
print("2nd word score: " + str(score_2))

1st word score: 10
2nd word score: 10


In [15]:
with open('scrabble_words.txt', encoding="utf-8") as f:
    scrabble_words = [line.strip() for line in f]
    
# score all words in scrabble dictionary, then reduce to unique sums
word_scores = [compute_word_score(clean_word(word)) for word in scrabble_words]

scores_unique = set(word_scores)

In [7]:
# returns list of words that have given score
def get_words_with_score(score):
        return [word for word in scrabble_words if compute_word_score(clean_word(word)) == score]

In [18]:
# collect all words that have the same score into lists
words_with_scores = [get_words_with_score(score) for score in scores_unique]

scores_to_words = dict(zip(scores_unique, words_with_scores))

# there's gotta be a way to vectorize this
# for score in scores_unique:
#     words_with_score = [word for word in scrabble_words if compute_word_score(word.lower()) == score]
#     scores_to_words[score] = words_with_score

In [19]:
def assert_anagrams(word_1, word_2):
    # why doesn't and keyword work correctly here?
    if not word_1.isalpha() and not word_2.isalpha():
        raise ValueError("Both words must be alphabetic strings.")
    clean_word_1 = clean_word(word_1)
    clean_word_2 = clean_word(word_2)
    return sorted(clean_word_1) == sorted(clean_word_2)

In [20]:
# assert_anagrams tests

print("peach & cheap: " + str(assert_anagrams("peach", "cheap")))
print("peach & beach: " + str(assert_anagrams("peach", "beach")))

print("Peach & Cheap: " + str(assert_anagrams("Peach", "Cheap")))
print("Peach & Beach: " + str(assert_anagrams("Peach", "Beach")))

print("Harry & Harry: " + str(assert_anagrams("Harry", "Harry")))

try:
    print("1234 & Dip1: " + str(assert_anagrams("1234", "Dip1")))
except ValueError:
    print("1234 & Dip1: " + "threw an error!")

peach & cheap: True
peach & beach: False
Peach & Cheap: True
Peach & Beach: False
Harry & Harry: True
1234 & Dip1: threw an error!


In [21]:
def get_anagrams(word):
    # word_cleaned = word.lower().strip() or something like that
    score = compute_word_score(word.lower())
    possible_anagrams = scores_to_words[score]
    return [anagram for anagram in possible_anagrams if assert_anagrams(word.lower(), anagram)]

In [None]:
# multi-word anagrammer
# input is string of words separated by spaces
def get_anagrams(input_words):
    words = input_words.split()
    cleaned_words = [clean_word(word) for word in words]
    word_lengths = [len(word) for word in cleaned_words]
    total_score = sum([compute_word_score(word) for word in words])

In [26]:
print("Anagrams of 'peach:' " + str(get_anagrams("peach")))

Anagrams of 'peach:' ['CHAPE', 'CHEAP', 'PEACH']


In [27]:
print("Anagrams of 'hostile:' " + str(get_anagrams("hostile")))

Anagrams of 'hostile:' ['EOLITHS', 'HOLIEST', 'HOSTILE']


### Next steps:

1. handle multiple words
2. ~~sanitize inputs~~
3. build UI

In [None]:
from string import ascii_lowercase

class Anagrammer:
    
    def __init__(self):
        
        # it probably makes the most sense to just...have the scores-to-words list created up front
        # and then read it in when the object is created? No point in doing all that computation
        # every single time a new Anagrammer gets spun up.
        # Maybe include a method for creating a new list given a different set of words just for 
        # completeness' sake? For weirdos who don't respect scrabble.
        self.letter_scores = dict(zip(ascii_lowercase, range(1, 27)))
        
        with open('scrabble_words.txt', encoding="utf-8") as f:
            self.scrabble_words = [line.strip() for line in f]
        word_scores = [self.compute_word_score(word.lower()) for word in self.scrabble_words]
        
        scores_unique = set(word_scores)
        
        # collect all words that have the same score into lists
        self.scores_to_words = {}

        # there's gotta be a way to vectorize this
        for score in scores_unique:
            words_with_score = [word for word in scrabble_words if self.compute_word_score(word.lower()) == score]
            self.scores_to_words[score] = words_with_score
            
        # doesn't work
#         words_with_scores = [self.get_words_with_score(score) for score in scores_unique]
            
    def clean_word(self, word):
        if not word.isalpha():
            raise ValueError("Words must be alphabetic strings.")
        return word.lower().strip()
    
    def compute_word_score(self, word):
        scores = [self.letter_scores[letter] for letter in word]
        return sum(scores)
    
    def assert_anagrams(self, word_1, word_2):
        # why doesn't and keyword work correctly here?
        if not word_1.isalpha() & word_2.isalpha():
            raise ValueError("Both words must be alphabetic strings.")
        # also need to check for whitespace
        return sorted(word_1.lower()) == sorted(word_2.lower())
    
    def get_anagrams(self, word):
        # word_cleaned = word.lower().strip() or something like that
        score = self.compute_word_score(word.lower())
        possible_anagrams = self.scores_to_words[score]
        return [anagram for anagram in possible_anagrams if self.assert_anagrams(word.lower(), anagram)]
    
    # doesn't work
    def get_words_with_score(self, score):
        return [word for word in self.scrabble_words if self.compute_word_score(self.clean_word(word)) == score]

In [None]:
anagrammer = Anagrammer()

In [None]:
anagrammer.get_anagrams("peach")