In [1]:
# run this cell to change the width of the current notebook
# this saves you from scrolling to the side when a code line is too long

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [2]:
import random
from collections import defaultdict

# [+1] for using class
# [+1] for using CapitalisedWord naming convention for class
# [+1] for using a constructor, i.e., the __init__ method
# [+1] for initialising variables within the __init__ method 
#      (number of variables here may vary from my solution)
# [+1] for using self as first parameter in all instance methods, i.e, functions within the class
# [+1] for using the with context manager to open files

class BookRecommendations:
    # no class variables
    
    def __init__(self):
        self.books = {} # {id_1:(author,title), ... ,id_55:(author,title)}        
        self.ratings = {} # {user_a:{id_1:rating, ... ,id_55:rating}, user_b:{...}, ...}
        
    def load_books(self):
        """ load books.txt into self.books 
        return: None
        """
        with open("books.txt") as f:
            f = f.read().splitlines()
            for line,bookInfo in enumerate(f):
                # line+1 to start counting from 1 instead of 0
                self.books[str(line+1)] = tuple(bookInfo.split(","))
        # test that all books loaded successfully
        assert len(self.books) == 55
            
    def load_ratings(self):
        """ load ratings.txt into self.ratings 
        > initially, there are 85 users in ratings.txt
        return: None
        """        
        with open("ratings.txt") as f:
            f = f.read().splitlines()
            for i in range(0, len(f), 2): # every even index to select username only
                username = f[i].title() # change case to title so that sorting works as expected
                ratings = list(map(int, f[i+1].split()))
                ratings_dict = {str(idx+1): rating for idx,rating in enumerate(ratings)}                
                self.ratings[username] = ratings_dict
        # we can't assert total number of users, 
        # because this can be changed dynamically

    def similarity(self, user_a, user_b):
        """
        params: user_a and user_b
        return: similarity score (as integer) between user_a and user_b
        complexity: O(m), where m = len(self.books)
        """
        user_a_ratings = self.ratings[user_a] # O(m)
        user_b_ratings = self.ratings[user_b] # O(m)
        similarity_score = 0
        for book_id, book_rating in user_a_ratings.items(): # O(m)
            similarity_score += book_rating * (user_b_ratings[book_id]) # O(1)
        return similarity_score

    def all_similarities(self, user_a):
        """
        params: user_a
        return: a sorted list of similarity scores between username and
                all other users => [(username, similarity_score), ...]
                in descending order of similarity.
                If two users have the same similarity score, 
                we break the tie by sorting in descending order of username?
        complexity: O(nm), where n = len(self.ratings) and m = len(self.books)
        """
        all_similarity = []        
        for user_b in self.ratings: # O(n)
            if user_a != user_b:
                all_similarity.append((user_b, self.similarity(user_a, user_b))) # O(m)
        
        # [John] how complex is this for the students?        
        # If two users have the same similarity score, 
        # break the tie by sorting in descending order of username
        return sorted(all_similarity, key=lambda x:(x[1],x[0]), reverse=True)
    
    def get_rating(self):
        """
        this method gets rating from a user, 
        and it handles error if user inputs an invalid rating
        
        return: user rating
        complexity: O(1)
        """
        valid_rating = False
        while not valid_rating:
            try:
                rating = int(input("How would you rate this book? "))
                if rating not in {-5,-3,0,1,3,5}:
                    raise ValueError(f"{rating} is not a valid rating. See valid options above!")                
                valid_rating = True
            except ValueError as v_error:
                print(f"ValueError: {v_error}")
        return rating
                
    def new_user(self, username):
        """
        this method adds a new user to self.ratings and ratings.txt
        it also populates the user's ratings of 20% of all books selected at random
        
        return: None
        complexity: O(m), where m = len(self.books)
        """
        # random.sample returns 0.2*55 = 11 unique books at random from self.books
        books_to_rate = set(random.sample(self.books.items(), int(0.2*len(self.books)))) # O(m)
        user_rating = {} # add new user to self.ratings
        print()
        print("-"*40)
        print(
            f"Welcome to our book recommendation page, {username}!\n"
            "As a new user, you have to tell us our opinion on a few books.\n"
            "If you have not read the book, answer 0 but otherwise use this scale.\n\n"
            " -5:  Hated it. \n"
            " -3:  Didn't like it. \n"
            "  1:  OK. \n"
            "  3:  Liked it. \n"
            "  5:  Really liked it. \n"
        )
        print("-"*40)
        for bookInfo in self.books.items(): # O(m)
            if bookInfo in books_to_rate:   # O(1) -- set membership check
                print(f">>> {bookInfo[1][1]}, written by {bookInfo[1][0]}")
                rating = self.get_rating()  # O(1)
                print("~"*20)
                user_rating[bookInfo[0]] = rating  # O(1)
            else:
                user_rating[bookInfo[0]] = 0
        self.ratings[username] = user_rating # O(1) -- add new user to self.ratings
        # write new user to ratings.txt
        # complexity anaylysis of writing not necessary
        with open("ratings.txt", "a") as outRating:
            print(f"\n{username}", file=outRating)
            for bookId in range(1, len(self.books)+1):
                print(user_rating[str(bookId)], end=" ", file=outRating)
                        

    def make_recommendations(self, user_a, num_recs):
        """
        params: user_a and number of (unique) books to be recommended
        return: (list of all similarities between user_a and other users,
                dict of recommended books, author, and user who recommended it)
        complexity: O(nm), where n = len(self.ratings) and m = len(self.books)  
        """
        # if num_rec is an empty string, default to 10
        if num_recs == "": num_recs = 10
        # otherwise, convert their entry to integer
        else: num_recs = int(num_recs)
        all_similarity = self.all_similarities(user_a) # O(nm)
        recommendations_by_book = {} # {(book_title,book_author):user, ...}
        recommendations_by_user = defaultdict(list) # {user_1: [(book_title,book_author), ...], ...}
        user_a_ratings = self.ratings[user_a] # O(m)
        # we go through each user with a high similarity score
        for user_b, similarity_score in all_similarity: # O(n) -- total number of users
            user_b_ratings = self.ratings[user_b] # O(m)
            # we go through user_a's ratings of all books and compare with user_b
            for bookID, book_rating in user_a_ratings.items(): # O(m)
                # if the book has not been read by user_a and has a high rating
                if book_rating == 0 and user_b_ratings[bookID] in {3, 5}:
                    # if the book has not been previously recommended by another user
                    if self.books[bookID] not in recommendations_by_book: # O(1)
                        recommendations_by_book[self.books[bookID]] = user_b
                        recommendations_by_user[user_b].append(self.books[bookID])
                        
                # if we have enough recommendations, exit the first loop
                if len(recommendations_by_book) == num_recs: break # O(1)      
            # exit the second loop
            if len(recommendations_by_book) == num_recs: break
                
        return all_similarity, recommendations_by_user
    
    def get_user_input(self):
        """
        this method retrieves user input to run the whole programe,
        and handles error accordingly
        
        return: (username, number of recommendations)
        complexity: O(1)
        """
        valid_input = False
        while not valid_input:
            try:
                username = input("What is your name? ").title()
                num_recs = input("How many recommendations? ")
                # if num_rec is a word, raise ValueError
                # i.e., if it is a string, not a digit, with length > 0                
                if isinstance(num_recs, str) and not num_recs.isdigit() and len(num_recs) > 0:
                    raise ValueError 
                valid_input = True
            except ValueError:
                print("Number of recommendation should be an integer or an empty string")        
        return username, num_recs
    
            
    def write_recommendations_to_file(self, user, num_recs, all_similarity, recommendations):
        """
        this method writes the recommendations for a specific user to file
        
        return: None
        complexity: Not required        
        """
        with open("output.txt", "w") as outFile:
            print(f"What is your name? {user}", file=outFile)
            print(f"How many recommendations? {num_recs}", file=outFile)
            # write similarities
            print(f"\nSimilarity of other users with {user}:", file=outFile)
            for user, similarity in all_similarity:
                print(f"\t {user}: {similarity}", file=outFile)
            # write recommendations
            print("\nRecommending based on the similarity algorithm.", file=outFile)
            print("+"*47, file=outFile)
            for user, books in recommendations.items():
                print(f"Books recommended by {user}:", file=outFile)
                for bookInfo in books:
                    print(f"\t {bookInfo[1]}, written by {bookInfo[0]}", file=outFile)
                    
    def main(self):
        """
        method that brings everything together
        <ObjectName>.main() is used to interact with this book recommendation program
        return: None
        complexity: O(nm), where n = len(self.ratings) and m = len(self.books)  
        """
        # load books and ratings 
        self.load_books()
        self.load_ratings()
        user, num_recs = self.get_user_input()  # O(1)
        if user not in self.ratings: # O(1)
            self.new_user(user)  # O(m)
        all_similarity, recommendations = self.make_recommendations(user, num_recs)  # O(nm)
        self.write_recommendations_to_file(user, num_recs, all_similarity, recommendations)
        

## Lab exam 2020

## Rules

* **Read the rules carefully before you begin.**

<div class="alert alert-danger">
    
* This submission must be 100% your own work. Do not discuss, share or collaborate on any part of this exam with any other person. 
    
</div>

* This is an **open book** exam. You **may** consult external references (books, Stack Overflow, etc.) but you **must not** copy and paste code verbatim; nor may you reveal or discuss any aspect of the exam with anyone in an online (or offline) medium.
* You must submit the correct file on Moodle by the posted deadline. There are no extensions.
* You may not import *any* libraries beyond those already imported for you. You do not need to use a library just because it is imported!

---

### Marking
* This exam is marked out of 80.
* The division of marks is listed by each task.
* The three parts of this exam are *independent*. If you cannot complete one, this will not affect your ability to complete the others.
* The parts increase in difficulty. Remember: partial solutions will get credit. You do not have to have completely working code, or implement all of the features requested, to get most of the marks. Make an attempt if you can.
* You are warned that spending excess time will not likely increase your grade but will increase your stress levels. 
   

## Important
Please enter, into the cell marked **[STATEMENT BELOW]** below, the following statement:

> I, [your name], have read and understand the rules governing this lab exam and I will abide by them.
    
(double-click a cell to edit it).


[STATEMENT HERE]

---

# Tasks

In [393]:
import numpy as np
import math, random, functools

# Background
A **word search puzzle** is a square grid of letters, where some sequences of letters make up words, and the rest are random letters. The puzzle is to find a list of known words in the grid. Words may be written horizontally, and sometimes vertically or even diagonally. In some variations, words can be reversed. 

The puzzle below contains the words `block`, `spain`, `sugar`, `beans`:

        S V B S B
        P Q L U E
        A I O G A
        I C C A N
        N W K R S

## Part 1: Reading words

You are building a system to help design word search puzzles. To do this, you have been asked to process some dictionary data to produce suitable word sets.

### (a) Read in words

Write a function `read_lines(fname)`. This should read a file named `fname` and return a list of lines, which should be stripped of newlines. There are 32 lines in the file `test.txt` and 19438 lines in the file `dictionary.txt`, and 0 lines in `empty.txt`. Write tests to make sure this function works. 

**[5 marks]**

In [394]:
## Solution

# [+1] for def
# [+1] for using with
# [+1] for stripping
# [+1] for returning a list
# [+1] for testing length of output

def read_lines(fname):
    with open(fname) as f:
        lines = f.readlines()
    return [line.strip() for line in lines]


assert len(read_lines("empty.txt"))==0
assert len(read_lines("test.txt"))==32
assert len(read_lines("dictionary.txt"))==19438

### (b) Clean up
You want to read the file `dictionary.txt` using the function above. 

Unfortunately, the dictionary has been transcribed poorly. Each line has several words in it, separated by one or more spaces. The case of letters is sometimes upper or lower case.  The words are in random order. Occasionally a page number appears mixed in with the words, which must be ignored. Words never contain line numbers; they are always separated by a space.

For example, part of `dictionary.txt` reads:

    Jags dunks
    uncoated CHAUFFEUSE drudgery
    249 ACCELERATIONS
    alabama
    Mellows sealed
    
The 249 is a page number and should be ignored. Valid words for our purposes only consist of alphabetic letters. These would be, in this case,

    jags
    dunks
    uncoated
    chauffeuse
    drudgery
    accelerations
    alabama
    mellows
    sealed
    
Any contractions or hyphenated words, like `don't` or `can't` or `topsy-turvy` should *also* be removed, as they have non-alphabetic characters.
  
* Write a function `clean_line(line)` that will take *one* line and return the valid words in it, all lowercase, **as a list**.
* Write tests to make sure `clean_line(line)` works correctly.
* Write a function `clean_all_lines(lines)` that will take a list like the return value of `read_lines` and return a list of words, cleaned by `clean_line` and **sorted into alphabetic order.**
* Write tests to make sure `clean_all_lines` works correctly.
* The words in `dictionary_txt` begin "a" and end "zwolle". There are 65152 words. 

**[12 marks]**

In [395]:
## Solution

# [+1] for lowercasign
# [+1] for splitting
# [+1] for alpha test

# [+1] for testing with page number
# [+1] for testing with case
# [+1] for testing with at least one other example

def clean_line(line):
    return [word for word in line.lower().split(" ") if word.isalpha()]


assert clean_line("")==[]    
assert clean_line("barn")==["barn"]    
assert clean_line("one two three")==["one", "two", "three"]    
assert clean_line("one two THREE")==["one", "two", "three"]    
assert clean_line("123 baby")==["baby"]    

# [+1] for iterating over lines
# [+1] for joining lists together
# [+1] for sorting

# [+1] for testing sortedness
# [+1] for testing start/end words
# [+1] for testing length

def clean_all_lines(lines):
    cleaned = []
    for line in lines:
        cleaned += clean_line(line)
    return sorted(cleaned)

assert clean_all_lines(["alpha charlie bravo", "beta gamma"]) == ["alpha", "beta", "bravo", "charlie", "gamma"]
assert clean_all_lines(["baby barn 123", "bread"]) == ["baby", "barn", "bread"]
assert clean_all_lines([]) == []
assert clean_all_lines(["brain"]) == ["brain"]

words = clean_all_lines(read_lines("dictionary.txt"))


assert words[0]=='a'
assert words[-1]=='zwolle'
assert len(words)==65152


### (c) Choose good words
Dictionary words aren't all good choices for a word search. 

The file `common_words.txt` has the 1000 most common words in English in it, one word per line. 

* Use the functions you defined above to read this file. All words are 1 to 13 characters long.
* Write tests to validate you have read it correctly.

#### Filtering the good words
* Write a function `good_words(dictionary, common_words)` that will take the dictionary words from `clean_all_lines()` and the common words you just loaded, and apply all of the following rules to select words from the dictionary that are good for a word search. Return the "good" words as a list, sorted in order of word length, shortest word first. A word is good if it:
    * is not a common word;
    * is three to eight characters long;
    * has at least one vowel;
    * is not equal to itself reversed (e.g. `naan` reversed is `naan` and would be excluded);
    * if two words in the dictionary are the same *except* one has an `s` at the end, the word without the `s` should be kept, and the other discarded. For examples, "cats" and "cat" should become just "cat"; "burger" and "burgers" should become "burger".
* Write tests to check `good_words` works OK  

For the last two marks, make your solution *reasonably* efficient -- in particular, it should avoid executing in O(N^2) time, where N is the number of words in the dictionary. As a very rough guide, an efficient implementation might take 100-2000ms running the timing test on most machines, but not *much* more.

There are around 20000-40000 good words. `yourself`, `zucchini`, `galaxies`, `ant`, `ape`, `ark` are all "good words".

**[20 marks]**

In [329]:
## Solution
# [+1] for reading *using* the existing function!
# [+1] for testing length of common words==1000
# [+1] for testing min, max word length

common_words = read_lines("common_words.txt")
words = clean_all_lines(read_lines("dictionary.txt"))

assert max([len(word) for word in common_words])==13
assert min([len(word) for word in common_words])==1
assert len(common_words)==1000

# [+1] for writing def
# [+1] for checking vowel count
# [+1] for plural test
# [+1] for palindrome test
# [+2] for not searching a list over and over again (e.g. using dictionaries or sets)
# [+1] for testing word lengths
# [+1] for excluding common words
# [+1] for combining all tests
# [+1] for returning a list of words
# [+1] for sorting with the right key!

def good_words(dictionary, common_words):
    
    # "set"-ify to avoid N^2 comparisons!
    common_words = {word:True for word in common_words}
    words = {word:True for word in dictionary}
    
    def vowels(word):
        return any([vowel in word for vowel in 'aeiou'])
    
    def palindrome(word):
        return word == word[::-1]
    
    def plural(word):
        return word[-1]=='s' and word[:-1] in words
    
    def good_word(w):
        return w not in common_words and 3<=len(w)<=8 and not palindrome(w) and vowels(w) and not plural(w)
    
    return sorted([word for word in words if good_word(word)], key=lambda x:len(x))
            


# [+1] for testing removal of common words
# [+1] for testing removal of too short/long
# [+1] for testing vowel count
# [+1] for testing plural removal
# [+1] for testing palindrome removal
# [+1] for testing sorting

assert good_words(["baby", "barn", "brain"], [])==["baby", "barn", "brain"]
assert good_words(["baby", "barn", "brain"], ["brain"])==["baby", "barn"]
assert good_words(["baby", "barn", "brn"], ["brain"])==["baby", "barn"]
assert good_words(["baby", "barn", "barnacle", "barnacles"], ["brain"])==["baby", "barn", "barnacle"]
assert good_words(["baby", "barn", "barnacle", "bs", "b"], ["brain"])==["baby", "barn", "barnacle"]
assert good_words(["baby", "barn", "barnacle", "toolongforthistest"], ["brain"])==["baby", "barn", "barnacle"]
assert good_words(["baby", "barn", "barnacle", ""], ["brain"])==["baby", "barn", "barnacle"]
assert good_words(["baby", "barn", "barnacle", "nun"], ["brain"])==["baby", "barn", "barnacle"]
assert good_words(["baby", "barn", "barnacle", "naan"], ["brain"])==["baby", "barn", "barnacle"]
assert good_words(["baby", "kraken", "barn", "barnacle", "naan"], ["brain"])==["baby", "barn", "kraken", "barnacle"]
    

In [370]:
%%timeit -n 10 -r 2
## Timing test
good_words(dictionary, common_words)

76.8 ms ± 3.46 ms per loop (mean ± std. dev. of 2 runs, 10 loops each)


## Part 2: A word search finder

Word search puzzles use random letters to hide the letter patterns.

For example: find the words "spleen, matrix, coding, lambda, basil" in the 8x8 grid below:

    G S P L E E N D
    S P G H C F R M
    T P Z L O C A B
    I L D Z D V J A
    M A T R I X B S
    A J C G N X D I
    A A B S G R Z L
    V L A M B D A L
    
The solution can be seen in the lower case characters below:

    G s p l e e n D
    S P G H c F R M
    T P Z L o C A b
    I L D Z d V J a
    m a t r i x B s
    A J C G n X D i
    A A B S g R Z l
    V l a m b d a L
    
### Find words
Write a function `find_words(puzzle, words)` that takes a word search puzzle as a string, and a list of potential words, and returns each of the words found in the puzzle in a list. This should find words hidden horizontally (left-to-right) or vertically (top-to-bottom). The search should ignore case, and always return the found words in lower case. It should only ever return a detected word at most *once*. It should ignore any blank lines.

Note: if you choose to only detect horizontal words, you will lose five of the possible marks.

[**18 marks**]


In [330]:
## Solution

# [+1] for a def line
# [+2] for splitting apart the string somehow
# [+1] for case insensitivity
# [+1] for skipping blank lines
# [+3] for matching rows
# [+5] for matching columns
# [+1] for collecting results into a list
# [+2] for removing dupicates in the result
# [+1] for lowercasing the return values
# [+1] for returning something (not printing it!)


def find_words(word_search, words):
    found = {}
    rows = [row.strip().upper().replace(" ", "") for row in word_search.splitlines() if len(row.strip())>1]
    words = [word.strip().upper() for word in words]    
    [found.update({word:True for word in words if word in row}) for row in rows]
    [found.update({word:True for word in words if word in "".join(col)}) for col in zip(*rows)]
    
    return [f.lower() for f in found]

In [304]:
## Tests

search = """
G S P L E E N D
S P G H C F R M
T P Z L O C A B
I L D Z D V J A
M A T R I X B S
A J C G N X D I
A A B S G R Z L
V L A M B D A L
"""

# compare two lists, ignoring order
def unordered_test(a,b):
    return sorted(a) == sorted(b)

assert unordered_test(find_words(search, ["lada", "bail"]), [])
assert unordered_test(find_words(search, []),[])
assert unordered_test(find_words(search, ["sbs"]),[])
assert unordered_test(find_words(search, ["matrix"]), ["matrix"])
assert unordered_test(find_words(search, ["lambda", "basil"]),["lambda", "basil"])
assert unordered_test(find_words(search, ["LAMBDA", "basil"]),["lambda", "basil"])
assert unordered_test(find_words(search, ["lAmbda", "BaSiL"]),["lambda", "basil"])
    
search_2 = """
F B S B s
G L P E u
B O A A g
P C I N a
O K N S r
"""

assert unordered_test(find_words(search_2, ["lada", "bail"]),[])
assert unordered_test(find_words(search_2, ["spain", "bail"]),["spain"])
assert unordered_test(find_words(search_2, ["spain", "sugar"]),["spain", "sugar"])
assert unordered_test(find_words(search_2, ["spainish"]),[])

search_3 = """
B S B S S S
E P E U U P
A A A G G A
N I N A A I
S N S R R N
S U G A R R
"""

assert unordered_test(find_words(search_3, ["lada", "bail"]),[])
assert unordered_test(find_words(search_3, ["spain"]),["spain"])
assert unordered_test(find_words(search_3, ["spain", "beans", "sugar"]),["spain", "beans", "sugar"])

## Adapt for reversed words

Adapt your solution to re-define a new version of `find_words(puzzle, words)` that also detects **reversed** words (both horizontal and vertical). For example

    M P F A O
    A I R J L
    P E O A L
    S C M M E
    Z E P Z H


contains "hello", but written bottom-to-top, and "pez" written right-to-left, as well as "jam" written top-to-bottom and "air" written left-to-right. The return value 
of `find_word(search, ["hello", "pez", "jam", "air"])` should be `["hello", "pez", "jam", "air"]` (in any order).

[**5 marks**]

In [307]:
## Solution

# [+1] for redefining find_words (in any way)
# [+2] for reversing check
# [+2] for returning the word in the right order

def find_words(word_search, words):
    found = {}
    rows = [row.strip().upper().replace(" ", "") for row in word_search.splitlines() if len(row.strip())>1]
    s_words = {word.strip().upper():word for word in words}
    s_words.update({word.strip().upper()[::-1]:word for word in words})
        
    [found.update({word:True for word in s_words if word in row}) for row in rows]
    [found.update({word:True for word in s_words if word in "".join(col)}) for col in zip(*rows)]
        
    return [s_words[f].lower() for f in found]

In [306]:
search_4 = """
M P F A O
A I R J L
P E O A L
S C M M E
Z E P Z H
"""

assert find_words(search_4, [])==[]
assert find_words(search_4, ["lada", "bail"])==[]
assert find_words(search_4, ["spain", "bail"])==[]
assert find_words(search_4, ["hello"]) == ["hello"]
assert unordered_test(find_words(search_4, ["hello", "spam", "from", "me", "cats"]), ["hello", "spam", "from", "me"])
assert unordered_test(find_words(search_4, ["hello", "jam", "air", "pez"]), ["hello", "jam", "air", "pez"])

## Part 3: Generating a word search

Write a function `generate_word_search(n, words, horizontal=True, vertical=False, reversed=False)` that will generate a word search of size `n x n` containing all the words in `words` and return it as a string, one row per line. **If it is impossible to fit the words in the grid because they are too long, an error should occur.**

You do not need to consider the case where there are too many words to fit into a word search. You do not have to deal with overlapping words, but you must make sure that every word in `words` appears correctly in the final puzzle.

Each parameter `horizontal`, `vertical` should enable or disable embedding words in that orientation -- **at least one of these must be True, or an error should occur**; `reversed` will allow reversed words in all enabled orientations. If multiple directions are enabled, the direction of each word should be set randomly.

Every entry in the output word search should be an upper case letter. Passing an input word with a non-letter **should cause an error**.

Note:

* `random.choice(l)` chooses a random element of a list `l`
* `random.randint(0, n)` chooses a random number between 0 and (including) `n`

Note: the easiest (but not only) way to approach this is to initialise a grid of random letters and make *random attempts* to place words in it.



Every letter in a row in the returned string should be separated by spaces, as in the examples.

For example, 

    grid = generate_word_search(5, ["hello", "from", "me"])   

might produce:

    N I U R I
    Y X F J I
    H E L L O
    F R O M A
    H I M E Z
    
(note that the "background" letters are chosen *randomly*)
    
and
    
    grid = generate_word_search(8, ["hello", "from", "my", "secret", "lair"], horizontal=True, vertical=True)   
    
might produce:

    S C D D L G C R
    E E F H A M J J
    C T Q E I H F Q
    R K Y L R H E A
    E W C L T H H X
    T D A O A S K G
    Q P F R O M E P
    M Y L Y K C R G
    
while 

    grid = generate_word_search(4, ["hello", "from", "my", "secret", "lair"], horizontal=True, vertical=True)   
    
would produce an error (`secret` is more than 4 characters long), as would:

    grid = generate_word_search(5, ["hello", "from", "CS1P"])   

(as 1 is not a letter).


**[20 marks]**

In [261]:
## Solution

# [+1] def is present
# [+1] n, words as parameters
# [+1] correct optional parameters
# [+1] word length test
# [+1] valid direction test

# [+1] non-letter test
# [+1] initial grid creation
# [+2] some way of choosing words

# [+2] some way of testing that words don't corrupt previous words
# *or equivalently* that all words in the original list of words make it in to the
# final output
# [+1] for reversing words randomly (when reversed True)
# [+1] for horizontal mode
# [+3] for vertical mode

# [+1] for ensuring uppercase letters
# [+1] for generating string in correct format
# [+1] for returning something!


def generate_word_search(n, words, horizontal=True, vertical=False, reversed=False):
    # check pre-conditions
    assert all([len(word)<=n for word in words]), "Word cannot fit in this grid"
    assert any([horizontal, vertical]), "Must have at least one valid direction!"
    assert all([c.isalpha() for word in words for c in word]), "Non-letter in word!"
    
    # create a random letter grid
    letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    grid = [[random.choice(letters) for i in range(n)] for j in range(n)]
    fixed = [[False for i in range(n)] for j in range(n)]
    
    while len(words)>0:
        word = random.choice(words)
        k = len(word)
        
        def insert(rx, cx):
            # choose a possible start location
            r = 0 if n-k*rx-1<1 else random.randint(0, n-k*rx-1)            
            c = 0 if n-k*cx-1<1 else random.randint(0, n-k*cx-1)                        
            written_word = word.upper()
            
            # flip randomly, if enabled
            if reversed and random.randint(0,1)==1:
                written_word = written_word[::-1]
                            
            # if we won't overwrite another word, then write it in
            # and remove it
            # (the more complex condition allows overlapping words)            
            #if not any([fixed[r+i*rx][c+i*cx] and grid[r+i*rx][c+i*cx]!=written_word[i] for i in range(k)]):            
            if not any([fixed[r+i*rx][c+i*cx] for i in range(k)]):            
                for i in range(k):
                    grid[r+i*rx][c+i*cx] = written_word[i].upper()
                    fixed[r+i*rx][c+i*cx] = True
                words.remove(word)                        
        
        # lots of ways of doing this selection...
        choices = []
        if horizontal:
            choices.append(lambda: insert(0, 1))
        if vertical:
            choices.append(lambda: insert(1, 0))
            
        random.choice(choices)()
            
    return "\n".join([" ".join(row) for row in grid])
            

    

B S B S S S
E P E U U P
A A A G G A
N I N A A I
S N S R R N
S U G A R R


In [309]:
print(generate_word_search(8, ["coding", "lambda", "matrix", "spleen", "basil"]))
print()
print(generate_word_search(8, ["coding", "lambda", "matrix", "spleen", "basil"], horizontal=True, vertical=True))
print()
print(generate_word_search(8, ["coding", "lambda", "matrix", "spleen", "basil"],  vertical=True))
print()

G B A S I L I F
S P L E E N L T
D Y D W G D P B
L M A T R I X G
X F E B K T P X
H Y T P X P V D
U L A M B D A H
N C O D I N G J

T Y M L V B N A
C Y A A P R J L
O X T M X P P A
D J R B Y R Z Z
I H I D H E E D
N B X A S S S L
G S P L E E N S
B A S I L E V L

K R P Q L X B S
I J Q C A M A P
E W V O M A S L
Z U S D B T I E
A I K I D R L E
I U U N A I H N
K S B G H X T W
J J M G V U V V



In [388]:
print(generate_word_search(4, ["beta", "pram", "nimo"]))


N I M O
P R A M
W L M V
B E T A


In [392]:
## Tests

def fails(expr):
    try:
        expr()
    except:
        return True
    return False

def n_lines(s):
    return len(s.splitlines())

def elt_check(s, n):    
    return all(len(line.split())==n for line in s.splitlines())

assert n_lines(generate_word_search(8, ["matrix", "spleen", "basil"]))==8
assert n_lines(generate_word_search(6, ["matrix", "spleen", "basil"]))==6
assert n_lines(generate_word_search(12, ["matrix", "spleen", "basil"]))==12

assert elt_check(generate_word_search(12, ["matrix", "spleen", "basil"]), 12)
assert elt_check(generate_word_search(8, ["matrix", "spleen", "basil"]), 8)
assert elt_check(generate_word_search(6, ["matrix", "spleen", "basil"]), 6)
assert elt_check(generate_word_search(3, ["tea", "bee", "elk"]), 3)
assert n_lines(generate_word_search(3, ["tea", "bee", "elk"])) ==  3
assert n_lines(generate_word_search(6, []))==6
assert elt_check(generate_word_search(6, []),6)


assert (lambda :generate_word_search(8, ["127", "9", "matrix", "spleen", "basil"]))

assert fails(lambda :generate_word_search(4, ["coding", "lambda", "matrix", "spleen", "basil"])) and not fails(lambda:generate_word_search(4, ["code", "lamb", "matt"]))
assert fails(lambda :generate_word_search(8, ["127", "9", "matrix", "spleen", "basil"]))
assert fails(lambda :generate_word_search(8, ["back{}", "span's", "matrix", "spleen", "'''"]))
assert fails(lambda :generate_word_search(8, ["coding", "lambda", "matrix", "spleen", "basil"], horizontal=False)) and not fails(lambda :generate_word_search(8, ["coding", "lambda", "matrix", "spleen", "basil"]))


# END OF EXAM

Please:

* Take a break.
* Make sure you read each question carefully. The number one reason to lose marks is to not read the question!
* Check that each of your cells run as you expect. Try `Kernel/Restart and Run All` to make sure.
* Submit your solution on Moodle 
* And then relax :) 

---

