# Nile 

We can put any text we want here!

### Support Code

In [None]:
data File:
  | file(name :: String, content :: String)
end

data Recommendation:
  | recommendation(count :: Number, names :: List<String>)
end

FailureResult([object Object])

### Import Statements

In [2]:
import lists as L

"<Unknown value: details logged to console>"

### Data Definitions

In [3]:
data Pair:
  | pair(title1 :: String, title2 :: String)
end

"<Unknown value: details logged to console>"

# Implementation

In [4]:
fun recommend(title :: String, book-records :: List<File>) 
  -> Recommendation:
  doc: ```Takes in the title of a book and a list of files,
       and returns a recommendation of book(s) to be paired with title
       based on the files in book-records.```

  fun list-contains-title(lst :: List<String>) -> Boolean:
    doc: ```returns true if the given List<String> lst contains the given
         String title; otherwise, returns false```
    list-contains(lst, title)
  end

  fun is-not-title(next-title :: String) -> Boolean:
    doc: ```returns false if the given String next-title is the same as
         the given String title; otherwise, returns true```
    not(next-title == title)
  end

  # 1. Convert List<File> to nested List<String> of titles in each File
  parsed-book-records = L.map(parse-book-records, book-records)
  
  # 2. Filter only List<String>s that contain the given String title
  lists-with-title = L.filter(list-contains-title, parsed-book-records)
  
  # 3. Flatten List<String> lists-with-title, since we know that for each
  #    occurence of an item in lists-with-title, it was paired with the
  #    given String title or is the String title itself
  bag-of-titles = list-flatten(lists-with-title)

  # 4. Remove instances of the given String title from bag-of-titles
  filtered-bag = L.filter(is-not-title, bag-of-titles)

  # 5. Generate recommendation for elements that appear the most in
  #    filtered-bag
  generate-recommendations(filtered-bag)
end

fun generate-recommendations(bag-of-titles :: List<String>) 
  -> Recommendation:
  doc: ```generates a Recommendation from the given List<String> 
       bag-of-titles, where all of the elements with the highest number of
       appearances in the bag-of-titles are placed in the List<String> 
       names field the Recommendation and the number of times those books  
       each appear in the Number count field of the Recommendation```

  fun find-most-frequent-titles(
      bag-count :: List<Number>, 
      current-high-count :: Number, 
      current-list :: List<String>, 
      current-index :: Number) -> Recommendation:
    doc: ```finds the most frequently appearing titles in the List<String>
         bag-of-titles and uses them to create a Recommendation```
    cases (List) bag-count:
      | empty => 
        recommendation(current-high-count, list-reverse(current-list))
      | link(f, r) =>
        if f > current-high-count:
          find-most-frequent-titles(
            r, 
            f, 
            [list: list-get(bag-of-titles, current-index)], 
            current-index + 1
          )
        else if f == current-high-count:
          find-most-frequent-titles(
            r, 
            current-high-count, 
            link(list-get(bag-of-titles, current-index), current-list), 
            current-index + 1
          )
        else:
          find-most-frequent-titles(
            r, 
            current-high-count, 
            current-list, 
            current-index + 1
          )
        end
    end
  end

  bag-count = vectorize-list(bag-of-titles, list-distinct(bag-of-titles))
  find-most-frequent-titles(bag-count, 0, empty, 0)
end

fun vectorize-list(lst :: List<String>, bag-of-words :: List<String>) 
  -> List<Number>:
  doc: ```converts the given List<String> lst to a vector represented by
       a List<Number>, where each element in the vector corresponds to the
       element at the same index in the given List<String> bag-of-words```
  cases (List) bag-of-words:
    | empty => empty
    | link(f, r) =>
      link(list-count-instances(lst, f), vectorize-list(lst, r))
  end
end

fun popular-pairs(records :: List<File>) -> Recommendation:
  doc: ```Takes in a list of files and returns a recommendation of
       the most popular pair(s) of books in records.```
  
  # 1. Convert List<File> to nested List<String> of titles in each File
  parsed-titles = L.map(parse-book-records, records)
  
  # 2. Create all possible Pairs with each List<String>
  parsed-pairs = L.map(make-pairs-from-titles, parsed-titles)
  
  # 3. Flatten the nested List<Pair>, so it's easier to parse
  pairs = list-flatten(parsed-pairs)
  
  # 4. Find most frequently appearing Pairs in the List<Pair> pairs
  find-recommended-pairs(pairs, 0, empty)
end
# popular-pairs tests at the bottom of the file

fun find-recommended-pairs(
    list-of-pairs :: List<Pair>, 
    highest-instances :: Number, 
    highest-pairs :: List<Pair>) -> Recommendation:
  doc: ```consumes a List<Pair> list-of-pairs and returns a 
       Recommendation containing the titles of the Pairs that 
       appear the most in the list-of-pairs and the number of
       times that they appear```

  fun list-pair-remove(lst :: List<Pair>, p :: Pair) -> List<Pair>:
    doc: ```removes duplicate Pairs from the given List<Pair> lst, 
         where a duplicate is defined as two Pairs that have the 
         same titles in any order```
    cases (List) lst:
      | empty => empty
      | link(f, r) =>
        if pairs-match(f, p):
          list-pair-remove(r, p)
        else:
          link(f, list-pair-remove(r, p))
        end
    end
  end

  fun list-pair-count(lst :: List<Pair>, p :: Pair) -> Number:
    doc: ```counts the number of Pairs in the List<Pair> lst that 
         are dupicates of the given Pair p, where a duplicate is
         defined as two Pairs that have the same titles in any order```
    cases (List) lst:
      | empty => 0
      | link(f, r) =>
        if pairs-match(f, p):
          1 + list-pair-count(r, p)
        else:
          list-pair-count(r, p)
        end
    end
  end

  cases (List) list-of-pairs:
    | empty => 
      recommendation-titles = L.map(concat-pair-titles, highest-pairs)
      recommendation(highest-instances, recommendation-titles)
    | link(f, r) =>
      pair-instances = list-pair-count(list-of-pairs, f)
      next-list      = list-pair-remove(list-of-pairs, f)

      if pair-instances > highest-instances:
        find-recommended-pairs(next-list, pair-instances, [list: f])
      else if pair-instances == highest-instances:
        new-highest-pairs = link(f, highest-pairs)
        
        find-recommended-pairs(
          next-list, 
          highest-instances, 
          new-highest-pairs
        )
      else:
        find-recommended-pairs(next-list, highest-instances, highest-pairs)
      end
  end
end

fun concat-pair-titles(p :: Pair) -> String:
  doc: ```consumes the given Pair p and returns a String comprised of 
       the title1 and title2 field of p, separated by a plus sign ```
  p.title1 + "+" + p.title2
end

fun pairs-match(pair1 :: Pair, pair2 :: Pair) -> Boolean:
  doc: ```returns true if the two given Pairs have the same titles in any
           order; otherwise, returns false```
  ((pair1.title1 == pair2.title1) and (pair1.title2 == pair2.title2)) or
  ((pair1.title2 == pair2.title1) and (pair1.title1 == pair2.title2))
end

fun make-pairs-from-titles(list-of-titles :: List<String>) -> List<Pair>:
  doc: ```consumes a List<String> list-of-titles and returns a List<Pair>
       of every possible Pair in list-of-titles excluding duplicates
       (where two Pairs are considered duplicates if they share the
       same titles, regardless of order)```
  
  fun pair-title-with-others(
      title :: String, 
      other-titles :: List<String>) -> List<Pair>:
    doc: ```returns a List<Pair> of all possible Pairs between the 
         given String title and all Strings in the List<String> 
         other-titles```
    cases (List) other-titles:
      | empty => empty
      | link(f, r) => 
        link(pair(title, f), pair-title-with-others(title, r))
    end
  end

  fun pair-up-titles(titles :: List<String>):
    doc: ```creates all possible Pairs excluding duplicates for the 
         given List<String> titles```
    cases (List) list-of-titles:
      | empty => empty
      | link(f, r) => 
        new-pairs = pair-title-with-others(f, r)
        next-pairs = make-pairs-from-titles(r)
        link(new-pairs, next-pairs)
    end
  end

  list-flatten(pair-up-titles(list-of-titles))
end

fun list-count-instances<T>(lst :: List<T>, element :: T) -> Number:
  doc: "counts the number of times element is in the List list-of-any"
  cases (List) lst:
    | empty => 0
    | link(f, r) =>
      if f == element:
        1 + list-count-instances(r, element)
      else:
        list-count-instances(r, element)
      end
  end
where:
  list-count-instances(empty, 1) is 0
  list-count-instances([list: 1], 1) is 1
  list-count-instances([list: 2], 1) is 0
  list-count-instances([list: 1, 1, 1], 1) is 3
  list-count-instances([list: 2, 3, 4], 1) is 0
  list-count-instances([list: 1, 2, 3, 2, 1], 2) is 2
end

fun list-length<T>(lst :: List<T>) -> Number:
  doc: "gets the number of elements in the List<T> lst"
  L.fold(lam(acc, elm): acc + 1 end, 0, lst)
where:
  list-length(empty) is 0
  list-length([list: 1]) is 1
  list-length([list: 2]) is 1
  list-length([list: 1, 1, 1]) is 3
  list-length([list: 2, 3, 4]) is 3
  list-length([list: 1, 2, 3, 2, 1]) is 5
end

fun list-get<T>(lst :: List<T>, index :: Number) -> T:
  doc: "gets the element in the List lst at the zero-based Number index"
  cases (List) lst:
    | empty => empty
    | link(f, r) =>
      if index == 0:
        f
      else:
        list-get(r, index - 1)
      end
  end
where:
  list-get([list: 1, 2, 3, 4], 0) is 1
  list-get([list: 1, 2, 3, 4], 1) is 2
  list-get([list: 1, 2, 3, 4], 2) is 3
  list-get([list: 1, 2, 3, 4], 3) is 4
end

fun list-distinct<T>(lst :: List<T>) -> List<T>:
  doc: ```consumes a List list and outputs a List of the same values, 
       in the same order, with duplicate values removed after their 
       first occurrence in the list```

  fun link-if-distinct(old-list :: List, element :: Any) -> List:
    doc: ```links the given Any element to the List old-list if it
         isn't already present in the old-list```

    if list-contains(old-list, element):
      old-list
    else:
      link(element, old-list)
    end
  end

  list-reverse(L.foldl(link-if-distinct, empty, lst))
where:
  list-distinct(empty) is empty
  list-distinct([list: 1]) is [list: 1]
  list-distinct([list: 1, 1]) is [list: 1]
  list-distinct([list: 1, 2]) is [list: 1, 2]
  list-distinct([list: 1, 2, 3]) is [list: 1, 2, 3]
  list-distinct([list: 1, 2, 3, 2, 1, 4]) is [list: 1, 2, 3, 4]
end

fun list-reverse<T>(lst :: List<T>) -> List<T>:
  doc: "returns the given List lst with the elements in reverse order"
  L.foldl((lam(acc, elm): link(elm, acc) end), empty, lst)
where:
  list-reverse(empty) is empty
  list-reverse([list: 1]) is [list: 1]
  list-reverse([list: 1, 2]) is [list: 2, 1]
  list-reverse([list: 1, 2, 3]) is [list: 3, 2, 1]
  list-reverse([list: 1, 2, 3, 4, 5]) is [list: 5, 4, 3, 2, 1]
end

fun list-contains<T>(lst :: List<T>, element :: T) -> Boolean:
  doc: ```returns true if the List lst contains the given element;
       otherwise, returns false```
  cases (List) lst:
    | empty => false
    | link(f, r) =>
      if f == element:
        true
      else:
        list-contains(r, element)
      end
  end
where:
  list-contains(empty, 1) is false
  list-contains([list: 1], 1) is true
  list-contains([list: 1], 2) is false
  list-contains([list: 1, 1, 1], 1) is true
  list-contains([list: 2, 2, 2], 1) is false
  list-contains([list: 1, 2, 3], 3) is true
  list-contains([list: 1, 2, 3], 4) is false
end

fun list-flatten<T>(nested-list :: List<T>) -> List<T>:
  doc: ```flattens the given List nested-list so that all elements 
       within any nested lists (or not) in nested-list are returned
       in a single List that doesn't contain any nested lists```
  
  fun combine-lists(old-list :: List, new-elms :: Any) -> List:
    doc: "combines the two given Lists or Any element into a single list"

    if is-link(new-elms):
      old-list.foldr(link, new-elms)
    else if is-empty(new-elms):
      old-list
    else:
      link(new-elms, old-list)
    end
  end

  L.foldl(combine-lists, empty, nested-list)
where:
  list-flatten(empty) 
    is empty
  list-flatten([list: 1, [list: 2], [list: 3]]) 
    is [list: 1, 2, 3]
  list-flatten([list: 1, empty, [list: 2], [list: 3], empty]) 
    is [list: 1, 2, 3]
  list-flatten([list: [list: 1], [list: 2], [list: 3]]) 
    is [list: 1, 2, 3]
end

# END List library

# START parser for Files -> List<String> function definitions:
fun parse-book-records(record :: File) -> List<String>:
  doc: ```retrieves a parsed set of words as a List<String> from the 
         given File record's content```
  parse-document-words(record.content, "\n")
where:
  parse-book-records(file("a.txt", "")) is [list:]
  parse-book-records(file("b.txt", "test")) is [list: "test"]
  parse-book-records(file("c.txt", "a\nb\nc")) is [list: "a", "b", "c"]
  parse-book-records(file("d.txt", "Do\ncs\nfor\nshriram")) 
    is [list: "Do", "cs", "for", "shriram"]
end

fun parse-document-words(
    document :: String, 
    char :: String) -> List<String>:
  doc: ```splits the given String document at the given char String of 
       length one into a List<String>, where each element in the list  
       was delimited in the given document by char```

  fun parse-first-word-codes-from-code-points(numbers :: List<Number>) 
    -> List<Number>:
    doc: ```retrieves all of the code points for the first word in the 
         List<Number> numbers, delimited by the code point for char```
    cases (List) numbers:
      | empty => empty
      | link(f, r) =>
        if f == string-to-code-point(char):
          empty
        else:
          link(f, parse-first-word-codes-from-code-points(r))
        end
    end
  end

  fun remove-first-word-codes-from-code-points(numbers :: List<Number>) 
    -> List<Number>:
    doc: ```removes all of the code points for the first word in the 
         List<Number> numbers, delimited by the code point for char,
         including the first instance of the code point for char```
    cases (List) numbers:
      | empty => empty
      | link(f, r) =>
        if f == string-to-code-point(char):
          r
        else:
          remove-first-word-codes-from-code-points(r)
        end
    end
  end

  fun parse-codes-to-string-list(codes :: List<Number>) -> List<String>:
    doc: ```parses the given List<Number> codes into a List<String> by
         separating them by the code point for char```
    cases (List) codes:
      | empty => empty
      | link(f, r) =>
        parsed-codes    = parse-first-word-codes-from-code-points(codes)
        parsed-string   = string-from-code-points(parsed-codes)
        remaining-codes = remove-first-word-codes-from-code-points(codes)

        link(parsed-string, parse-codes-to-string-list(remaining-codes))
    end
  end

  parse-codes-to-string-list(string-to-code-points(document))
where:
  parse-document-words("", "\n") is [list:]
  parse-document-words("test", "\n") is [list: "test"]
  parse-document-words("dog\ncat", "\n") is [list: "dog", "cat"]
  parse-document-words("a\nb\nc", "\n") is [list: "a", "b", "c"]
  parse-document-words("Do\ncs\nfor\nshriram", "\n") 
    is [list: "Do", "cs", "for", "shriram"]
end

"<Unknown value: details logged to console>"

In [5]:
check ```popular-pairs handles some titles appearing the most across all
      Files but that do not appear in the most frequent pairs```:

  # Create files where "TAPS0260" appears the most individually  
  # but not in most frequent pairs
  c1 = file("custom_1.txt", "TAPS0260\nJAPN0100\nCSCI0190\nAPMA1655")
  c2 = file("custom_2.txt", "JAPN0100\nCSCI0190\nAPMA1655")
  c3 = file("custom_3.txt", "TAPS0260\nENGL0950")
  c4 = file("custom_4.txt", "RUSS1200\nTAPS0260")
  c5 = file("custom_5.txt", "CSCI0190\nTAPS0260\nAPMA1655")
  
  popular-pairs([list: c1, c2, c3]) 
    is recommendation(2, [list: "CSCI0190+APMA1655", "JAPN0100+CSCI0190", "JAPN0100+APMA1655"])
  popular-pairs([list: c1, c2, c3, c4]) 
    is recommendation(2, [list: "CSCI0190+APMA1655", "JAPN0100+CSCI0190", "JAPN0100+APMA1655"])
  popular-pairs([list: c1, c2, c3, c5]) 
    is recommendation(3, [list: "CSCI0190+APMA1655"])
end

"<Unknown value: details logged to console>"

In [6]:
c1 = file("custom_1.txt", "TAPS0260\nJAPN0100\nCSCI0190\nAPMA1655")
c2 = file("custom_2.txt", "JAPN0100\nCSCI0190\nAPMA1655")
c3 = file("custom_3.txt", "TAPS0260\nENGL0950")
c4 = file("custom_4.txt", "RUSS1200\nTAPS0260")
c5 = file("custom_5.txt", "CSCI0190\nTAPS0260\nAPMA1655")

"<Unknown value: details logged to console>"

In [7]:
popular-pairs([list: c1, c2, c3, c4]) 

"<Unknown value: details logged to console>"

In [8]:
x = 1
x * 4

4

In [9]:
y = [list: 1, 2, 3]

"<Unknown value: details logged to console>"

In [10]:
y.first

1

In [11]:
true

true

In [12]:
2

2

In [13]:
popular-pairs

"<Unknown value: details logged to console>"