# HW3

### Importing Libraries 

In [2]:
import requests
from bs4 import BeautifulSoup as bs
import os
import pickle
import numpy as np
import time
import datetime as dt
import csv
import pandas as pd
import nltk
import re
from nltk.corpus import stopwords
import nltk
import string
import heapq

In [121]:
# nltk.download('stopwords')
# nltk.download('punkt')

[nltk_data] Downloading package punkt to /Users/hassan/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

## 1. Data collection

### 1.1.

In [3]:
URL = "https://myanimelist.net/topanime.php"
urls = [] # list for storing urls of all the anime

def get_urls():
    
    """get_urls() returns the list of the urls for each anime"""
    
    for lim in range(0, 20000, 50):
        r = requests.get(URL, params={"limit": lim})

        if r.status_code == 404: # in case page is inaccessable
            print("Unfortunately, page {} is inaccessable. We're interrupting the operation and returning the pages found.".format(lim))

        soup = bs(r.content, 'html5lib')

        for res in soup.find_all('a', class_='hoverinfo_trigger fl-l ml12 mr8'):
            url = res['href']
            if url not in urls:
                urls.append(url)

    return urls

In [3]:
filename = 'urls.txt'

if filename not in os.listdir(): # create file if not already created
    with open(filename, 'w') as f:
        f.write('\n'.join(list(map(str, urls))))

else: # load file
    with open(filename, 'r', encoding="utf8") as f:
        urls = f.read().split("\n")
        print("urls.txt loaded.")

urls.txt loaded.


In [None]:
get_urls()

In [4]:
print(len(urls)) # number of urls loaded

19218


### 1.2

In [1]:
def crawl_animes(urls_):
    
    """crawl_animes function fetches html of every anime found by the get_url() method. It then
    saves them in an 'htmls' directory. Inside 'htmls' directory, it saves htmls wrt to the page folder
    it belongs to with the fashion 'htmls/page_rank_i/article_j.html'. In order to avoid repeatedly
    downloading the htmls file, a binary file named as 'counter' is created to start from where
    we left off in case of any interruption."""
    
    if 'counter' not in os.listdir(): # initialize counter in case not already created
        start = 0
    else:
        with open('counter', 'rb') as c: # load counter
            start = pickle.load(c) + 1
    print("Starting from anime no. {}".format(start))

    for i in range(start, len(urls_)):
        page_rank = str(int(np.floor(i/50)))
        
        if i%50 == 0 or f"page_rank_{page_rank}" not in os.listdir('./htmls'):
            os.mkdir('htmls/page_rank_{}'.format(page_rank))

        html = requests.get(urls_[i])
        sleep = 20

        while html.status_code != 200:
            print("Waiting {} seconds as we reach request limit while retrieving page no. {}.\n".format(sleep, i))
            html.close()
            time.sleep(sleep)
            html = requests.get(urls_[i])
            sleep += 5

        with open("htmls/page_rank_{}/article_{}.html".format(page_rank, i), "w", encoding="utf-8") as f:
            f.write(html.text)

        with open("counter", "wb") as c:
            pickle.dump(i, c)

In [15]:
if 'htmls' not in os.listdir():
  os.mkdir('htmls')

In [15]:
crawl_animes(urls)

Starting from anime no. 19218


### 1.3

In [1]:
def parse_pages(i_, folder_name="anime_tsvs"):
    
    """This routine parses the htmls we downloaded and fetches the information we are required in the homework
    and saves them in an article_i.tsv file inside anime_tsvs directory."""
    
    print("Working on page {}".format(i_))
    page_rank = str(int(np.floor(i_/50)))
    article_path = "htmls/page_rank_{}/article_{}.html".format(page_rank, i_)

    with open(article_path, 'r', encoding='utf-8') as f:
        article = bs(f.read(), 'html.parser')

    animeTitle = article.find("h1", {"class":"title-name h1_bold_none"}).string
    # print(animeTitle)

    animeType = article.find("span", {"class":"information type"}).string
    # print(animeType)

    contents = article.find_all('div', {'class': "spaceit_pad"})        
    for c in contents:
        span_ = c.find('span', {'class': "dark_text"})
        if span_ is not None:
            if span_.string == "Episodes:":
                if c.contents[2] != '\n  Unknown\n  ':
                    animeNumEpisode = int(c.contents[2])
                else:
                    animeNumEpisode = ''       
    #             print(animeNumEpisode)

            if span_.string == "Aired:":
                dates_ = c.contents[2].string.replace('\n', '').strip().split(' to ')
    #             print(dates_)
                if dates_[0] == 'Not available':
                    releaseDate = ''
                    endDate = ''
                else:
                    if len(dates_) == 2 and '?' not in dates_:                
                        releaseDate = dates_[0]
                        endDate = dates_[1]

                        if len(releaseDate.split(' ')) == 3:
                            releaseDate = dt.datetime.strptime(releaseDate, "%b %d, %Y") # Datetime conversion

                        elif len(releaseDate.split(' ')) == 2:
                            releaseDate = dt.datetime.strptime(releaseDate, "%b %Y")

                        else:
                            releaseDate = print(dt.datetime.strptime(releaseDate, "%Y"))

                        if len(endDate.split(' ')) == 3:
                            endDate = dt.datetime.strptime(endDate, "%b %d, %Y")

                        elif len(endDate.split(' ')) == 2:
                            endDate = dt.datetime.strptime(endDate, "%b %Y")

                        else:
                            endDate = dt.datetime.strptime(endDate, "%Y")
                    else:
                        endDate = ''
                        releaseDate = dates_[0]

                        if len(releaseDate.split(' ')) == 3:
                            releaseDate = dt.datetime.strptime(releaseDate, "%b %d, %Y")

                        elif len(releaseDate.split(' ')) == 2:
                            releaseDate = dt.datetime.strptime(releaseDate, "%b %Y")

                        else:
                            releaseDate = dt.datetime.strptime(releaseDate, "%Y")

    animeNumMembers = int(article.find("span", {"class": "numbers members"}).contents[1].string.replace(',', ''))
    # print(animeNumMembers)

    if article.find("div", {"class": "score-label score-9"}) is not None:
        animeScore = float(article.find("div", {"class": "score-label score-9"}).contents[0])
    else:
        animeScore = ''
    # print(animeScore)

    if article.find("span", {"itemprop": {"ratingCount"}}) is not None:
        animeUsers = int(article.find("span", {"itemprop": {"ratingCount"}}).contents[0])
    else:
        animeUsers = ''
    # print(animeUsers)

    if (article.find("span", {"class": "numbers ranked"}) is not None):
        try:
            animeRank = int(article.find("span", {"class": "numbers ranked"}).contents[1].string[1:])
        except:
            animeRank = ''
    else:
        animeRank = ''
    # print(animeRank)

    if article.find("span", {"class": "numbers popularity"}) is not None:
        animePopularity = int(article.find("span", {"class": "numbers popularity"}).contents[1].string[1:])
    else:
        animePopularity = ''
    # print(animePopularity)

    if article.find("p", {"itemprop": {"description"}}) is not None:
        animeDescription = article.find("p", {"itemprop": {"description"}}).contents[0]
    else:
        animeDescription = ''
    # print(animeDescription)

    animeRelated = []

    tbl_anime = article.find("table", {"class": "anime_detail_related_anime"})
    if tbl_anime is not None:
        anime_links = tbl_anime.find_all("a")
        for e in anime_links:
            animeRelated.append(str(e.text))

        animeRelated = list(set(animeRelated))
        if '' in animeRelated:
            animeRelated.remove('')
        if ' ' in animeRelated:
            animeRelated.remove(' ')
    else:
        animeRelated = ''
    # print(animeRelated)

    animeCharacters = []

    tbl_characters = article.find_all("h3", {"class": "h3_characters_voice_actors"})
    if tbl_characters is not None:
        for e in tbl_characters:
            a_ = e.find("a")
            animeCharacters.append((a_.text))
    else:
        animeCharacters = ''
    # print(animeCharacters)

    animeVoices = []

    tbl_voices = article.find_all("td", {"class": "va-t ar pl4 pr4"})
    if tbl_voices is not None:
        for e in tbl_voices:
            a_ = e.find("a")
            animeVoices.append((a_.text))
    else:
        animeVoices = ''

    # print(animeVoices)

    animeStaff = []
    
    if len(article.find_all('div', {'class': "detail-characters-list clearfix"})) > 1:
        staff = article.find_all('div', {'class': "detail-characters-list clearfix"})[1]
        td = staff.find_all('td', {'class': "borderClass"})
    
        for td_ in td:
            if td_.get('width') == None:
                animeStaff.append([td_.find('a').string, td_.find('small').string])
    else:
        animeStaff = ''
    
#     print(animeStaff)

    with open('{}/anime_{}.tsv'.format(folder_name, i_), 'wt', e # save parsed info. into a tsv file
              ncoding="utf8") as f_:
        tsv_wt = csv.writer(f_, delimiter='\t')
        tsv_wt.writerow([animeTitle, animeType, animeNumEpisode, releaseDate, endDate, animeNumMembers,animeScore, \
                         animeUsers, animeRank, animePopularity, animeDescription, animeRelated, animeCharacters, \
                         animeVoices, animeStaff])

In [37]:
if "anime_tsvs" not in os.listdir():
    os.mkdir("anime_tsvs")
    for i in range(len(urls)):
        parse_pages(i)
        
for i in range(len(urls)):
    parse_pages(i)

## 2. Search Engine

### Pre processing steps

The steps that follow involves the merging of all the tsv, resulting in a dataframe. We then process this dataframe by working on its description (synopsis) field. We do tokenization, removing of stopwords & punctuation, and stemming. The resulting dataframe is saved in the csv format and in binary format for its use later.

In [5]:
def sort_files(t):

    """This method sorts all the tsv files in the following fashion
    anime_0.tsv, anime_1.tsv, anime_2.tsv, anime_3.tsv, ....."""

    return [a(x) for x in re.split(r'(\d+)', t)]

def a(t):
    return int(t) if t.isdigit() else t

In [6]:
def merge_tsvs(path, column_names):
    
    """Here we merge the tsv files into a single dataframe."""

    list_of_files = sorted(os.listdir(path), key=sort_files)
    df = pd.read_csv(path+list_of_files[0],
                     names=column_names,
                     sep="\t", engine='c')
    
    for f in list_of_files[1:]:
        df_ = pd.read_csv(path+f,
                          names=column_names,
                          sep="\t", engine='c')
        df = pd.concat([df, df_], ignore_index=True)
        
    return df

In [196]:
path = "./anime_tsvs/"
columns = ["animeTitle", "animeType", "animeNumEpisode", "releaseDate", "endDate", "animeNumMembers",
            "animeScore", "animeUsers", "animeRank", "animePopularity", "animeDescription", "animeRelated",
            "animeCharacters", "animeVoices", "animeStaff"]

if "df.csv" not in os.listdir(): # then create and pre-process dataset
    df = merge_tsvs(path, columns)
    df = df.drop([0], axis=0)
    df = df.reset_index(drop=True)
    df["animeNumMembers"].fillna(0)
    df["animePopularity"].fillna(0)
    df["animeNumMembers"] = df["animeNumMembers"].astype(int)
    df["animePopularity"] = df["animePopularity"].astype(int)

    df.to_csv("./df.csv")

else:
    df = pd.read_csv("df.csv")

In [7]:
def text_process(text_, type_stemmer="porter"): # we use porter stemmer by default

    """Here we process the synopsis as mentioned above. We return a list containing words which are
    stemmed, tokenized, removed fom punctuation and stopwords."""

    stopwords_english = stopwords.words("english")

    if type_stemmer == "porter":
        stemmer = nltk.stem.PorterStemmer()
    elif type_stemmer == "lancaster":
        stemmer = nltk.stem.LancasterStemmer()
        
    try:
        text_tokenized = nltk.word_tokenize(text_) # tokenization
        stemmed = [stemmer.stem(word) for word in text_tokenized if ((word.lower() not in stopwords_english) and (word not in string.punctuation))] # stemming
    except TypeError as e:
        print(text_)
        raise e
        
    return stemmed

In [199]:
# Load or create (if not already) the dataframe with an additional column of preprocessed description

if "tokenized_df.p" not in os.listdir():
    df_tokenized = df.assign(description_tokenized=df["animeDescription"].fillna('').apply(lambda m: text_process(m)))
    with open("tokenized_df.p", "wb") as f:
        pickle.dump(df_tokenized, f)
else:
    with open("tokenized_df.p", "rb") as f:
        df_tokenized = pickle.load(f)

## 2.1

### 2.1.1

In [8]:
def get_vocabulary(synopsis, vocabulary_file = "vocabulary.pkl"):
    
    """Here we generate a vocab of all words from the description. We tag each word with an integer term_id
    and then save it in a binary file."""

    vocab = set()

    for desc in synopsis:
        vocab = vocab.union(set(desc))

    vocab_dict = dict(zip(sorted(vocab), range(len(vocab))))
    with open(vocabulary_file, "wb") as f:
        pickle.dump(vocab_dict, f)
        
    return vocab_dict

In [9]:
def inverted_idx(synopsis, vocab, inverted_idx_file):
    
    """Here we create a dictionary (inverted index) in which against each term id we have a list of documents no.
    which contain that specific word."""

    inverted_idx = dict()
    for term, term_id in vocab.items():
        inverted_idx[term_id] = set() # create and initialize the dictionary with a set against each key to avoid duplicates

    descriptions = zip(synopsis, range(len(synopsis)))   # tokenized description against doc no. 
    for desc, doc_n in descriptions:
        checked_words = []
        for word in desc:
            if word not in checked_words: # check if we have already worked on this word
                checked_words.append(word)
                term_id = vocab[word]
                inverted_idx[term_id] = inverted_idx[term_id].union(set([doc_n]))

    for term_id, docs_set in inverted_idx.items():
        inverted_idx[term_id] = sorted(list(inverted_idx[term_id]))

    # create and save the inv_idx in a binary file
    with open(inverted_idx_file, "wb") as f:
        pickle.dump(inverted_idx, f)

    return inverted_idx

In [10]:
def get_synopsis(synopsis_file = "tokenized_df.p"):

    """Here we load the descriptions."""

    print('Loading synopsis... ', end ='')
    with open(synopsis_file, 'rb') as f:
        df = pickle.load(f)

    synopsis = list(df['description_tokenized'])
    print('\nSuccessfully loaded.\n')
    return synopsis

In [11]:
def get_vocab(synopsis, vocabulary_file = "vocabulary.pkl"):
    
    """Load vocabulary (in case it's present) otherwise create it."""

    print('Loading vocabulary... ', end ='')
    if vocabulary_file not in os.listdir():
        vocab = get_vocabulary(synopsis, vocabulary_file)
    else:
        with open(vocabulary_file, "rb") as f:
            vocab = pickle.load(f)
    print('\nSuccessfully loaded.\n')
    
    return vocab

In [12]:
def get_inverted_idx(synopsis, vocab, inverted_idx_file = "inverted_index.pkl"):
    
    """Load inverted index (in case it's present) otherwise create it."""

    print('Loading inverted index... ', end ='')
    if inverted_idx_file not in os.listdir():
        inverted_idx = inverted_idx(synopsis, vocab, inverted_idx_file)
    else:
        with open(inverted_idx_file, "rb") as f:
            inverted_idx = pickle.load(f)
    print('\nSuccessfully loaded.\n')
    
    return inverted_idx

In [13]:
vocabulary_file = "vocabulary.pkl"
synopsis_file = "tokenized_df.p"
inverted_idx_file = "inverted_index.pkl"

# Load synopsis, vocabulary, and inverted index
synopsis = get_synopsis(synopsis_file)
vocab = get_vocab(synopsis, vocabulary_file)
inverted_idx = get_inverted_idx(synopsis, vocab, inverted_idx_file)

Loading synopsis... 
Successfully loaded.

Loading vocabulary... 
Successfully loaded.

Loading inverted index... 
Successfully loaded.



In [14]:
def search_engine(vocab, inverted_idx, urls):
    
    """Search engine receives an input query and gives back the result of all anime documents that contain
    every word of the query inputted."""

    query = input('Please enter your query...\nquery: ') # Input query here

    q = query.lower()
    query = text_process(q) # pre-processing step

    # if first word not in our vocab, then no need to search for later words (since it's an AND query)
    if query[0] in vocab:
        term_id_1 = vocab[query[0]]
        docs_set = set(inverted_idx[term_id_1])

        for word in query[1:]:
            if word in vocab:
                term_id = vocab[word]
                docs = inverted_idx[term_id]

                # Intersection is necassary to ensure all words of the query are in the synopsis
                docs_set = docs_set.intersection(set(docs))

                # In case no intersection found
                if len(docs_set) == 0:
                    print("No result found.")
                    return

            else:
                print("No result found.")
                return

        df = pd.read_csv("./df.csv") # df containing the processed snypsis
        
        res = df.iloc[sorted(list(docs_set))][["animeTitle", "animeDescription"]]
        
        for i in sorted(list(docs_set)):
            res['URL'] = urls[i]

        return res

    else:
        print('No result found.')
        return


In [46]:
search_engine(vocab, inverted_idx, urls)

Please enter your query...
query: saiyan race


Unnamed: 0,animeTitle,animeDescription,URL
367,Dragon Ball Z,Five years after winning the World Martial Art...,https://myanimelist.net/anime/986/Dragon_Ball_...
402,Dragon Ball Super: Broly,"Forty-one years ago on Planet Vegeta, home of ...",https://myanimelist.net/anime/986/Dragon_Ball_...
1470,Dragon Ball Z Special 1: Tatta Hitori no Saish...,"Bardock, Son Goku's father, is a low-ranking S...",https://myanimelist.net/anime/986/Dragon_Ball_...


## 2.2

### 2.2.1

In [15]:
def find_tfidf(word, desc, synopsis, idf=None):
    
    """Here we calculate tfidf score corresponding the inputted word."""

    counter = 0
    if idf == None: # calculate idf if not provided
        for desc in synopsis:
            if word in desc:
                counter += 1
                
        idf = np.log(len(synopsis)/counter)
        
    tfidf = desc.count(word)/len(desc) * idf
    
    return idf, tfidf

In [16]:
def inverted_idx_2(synopsis, vocab, inverted_idx_tfidf_file="inverted_index_2.p", idfs_file="idfs.p"):
    
    """Here we generate a dictionary for our inverted index """
    
    second_inverted_idx = dict()
    for term_id in vocab.values():
        second_inverted_idx[term_id] = list()

    calculated_idfs = {}
    
    descriptions = zip(synopsis, range(len(synopsis)))
    for desc, doc_n in descriptions:
        checked_words = []
        for word in desc:
            # avoid redundancy of checking already checked words
            if word not in checked_words:
                checked_words.append(word)
                term_id = vocab[word]
                
                if word not in calculated_idfs.keys():
                    idf, tfidf = find_tfidf(word, desc, synopsis) # calculate idf and tfidf for this new word
                    calculated_idfs[word] = idf
                    
                else:
                    _, tfidf = find_tfidf(word, desc, synopsis, idf)

                second_inverted_idx[term_id].append([doc_n, tfidf]) # append document id and corresponding tfidf score

    for term_id, lists in second_inverted_idx.items():
        second_inverted_idx[term_id] = sorted(second_inverted_idx[term_id], key=lambda m: m[1]) # sort by tfidf score

    with open(inverted_idx_tfidf_file, "wb") as f:
        pickle.dump(second_inverted_idx, f)

    with open(idfs_file, "wb") as f:
        pickle.dump(calculated_idfs, f)

    return second_inverted_idx, calculated_idfs


In [17]:
def get_inverted_idx_tfidf(synopsis, vocab, inverted_idx_tfidf_file, idfs_file):

    """Load inverted index with tfidfs (in case it's present) otherwise create it."""

    print('Loading inverted index tfidf... \n', end ='')
    if (idfs_file not in os.listdir()) or (inverted_idx_tfidf_file not in os.listdir()):
        inv_idx_2, idfs = inverted_idx_2(synopsis, vocab, inverted_idx_tfidf_file, idfs_file)
        
    else:
        with open(inverted_idx_tfidf_file, "rb") as f:
            inv_idx_2 = pickle.load(f)
            
        with open(idfs_file, "rb") as f:
            idfs = pickle.load(f)
    print('Successfully loaded.')
    return inv_idx_2, idfs

In [18]:
inverted_idx_tfidf_file = "inverted_index_2.p"
idfs_file = "idfs.p"

inv_idx_2, idfs = get_inverted_idx_tfidf(synopsis, vocab, inverted_idx_tfidf_file, idfs_file)

Loading inverted index tfidf... 
Successfully loaded.


In [29]:
def find_cos_similarity(vector_1, vector_2):

    """Computes cosine similarity between two vectors"""
    
    return (np.dot(vector_1, vector_2))/(np.linalg.norm(vector_1) * np.linalg.norm(vector_2))

In [20]:
def find_top_k_docs(query, synopsis, vocab, inv_idx_2, idfs, urls, k=10):

    """Here we create max-heap of the documents containing words of the input query,
    we then arrange them wrt cosine similarity of these documents with the query and
    return top k documents only."""

    df = pd.read_csv("./df.csv")

    query = text_process(query.lower()) # query pre-processing

    res_dict = {} # result dictionary

    for word in query:
        if word in vocab.keys():
            term_id = vocab[word]
            for list_ in inv_idx_2[term_id]:
                if list_[0] not in res_dict.keys():
                    res_dict[list_[0]] = []
                res_dict[list_[0]].append(list_[1])
#         else:
#             print("No result found.")

    vector_query = [(query.count(q)/len(query)) * idfs[q] for q in query if q in idfs.keys()]
    
    dists = []
    
    for key in res_dict.keys():
        vec = res_dict[key]
        if len(vec) == len(vector_query):
            dists.append((-find_cos_similarity(vector_query, vec), key))

    heapq.heapify(dists) # using heap data structure
    dists_len = len(dists)
    res = []
    for i in range(min(k, dists_len)):
        e = heapq.heappop(dists)
        res.append([e[1], -e[0]])

    indices = [i[0] for i in res]
    dists = [i[1] for i in res]

    df_1 = df.iloc[indices][["animeTitle", "animeDescription"]]
    
    df_res = df_1.assign(URL=[urls[i] for i in indices],
                                 Similarity=dists)
    return df_res

In [34]:
query = "first anime"
output = find_top_k_docs(query, synopsis, vocab, inv_idx_2, idfs, urls)
output

Unnamed: 0,animeTitle,animeDescription,URL,Similarity
3430,Oshiete! Galko-chan,"At first glance, Galko, Otako, and Ojou are th...",https://myanimelist.net/anime/32013/Oshiete_Ga...,0.999756
5559,Astarotte no Omocha! EX,The OVA Astaroette no Omocha is a three part s...,https://myanimelist.net/anime/10582/Astarotte_...,0.999431
16381,Ishiyama-dera Digital Engi Emaki,The Ishiyama-dera temple in Outsu announced th...,https://myanimelist.net/anime/33982/Ishiyama-d...,0.99924
4844,Panda Kopanda,Panda Kopanda (Panda! Go Panda!) is a 30 minut...,https://myanimelist.net/anime/2611/Panda_Kopanda,0.999154
1119,Mousou Dairinin,The infamous Shounen Bat (Lil' Slugger) is ter...,https://myanimelist.net/anime/323/Mousou_Dairinin,0.998555
9509,Saa Ikou! Tamagotchi,Let's Go! Tamagotchi is an anime series focusi...,https://myanimelist.net/anime/6798/Saa_Ikou_Ta...,0.998269
10786,Touyama Sakura Uchuu Chou: Yatsu no Na wa Gold,This science-fiction anime was inspired somewh...,https://myanimelist.net/anime/12399/Touyama_Sa...,0.996559
1907,Sakurako-san no Ashimoto ni wa Shitai ga Umatt...,When Shoutarou Tatewaki first meets Sakurako K...,https://myanimelist.net/anime/30187/Sakurako-s...,0.995053
9358,Metropolis (2009),Mirai Mizue's first time experimenting with ge...,https://myanimelist.net/anime/29765/Metropolis...,0.9944
325,One Piece: Episode of Merry - Mou Hitori no Na...,The story arcs aboard the Straw Hat Crew's fir...,https://myanimelist.net/anime/19123/One_Piece_...,0.992909


In [35]:
query = "famous story"
output = find_top_k_docs(query, synopsis, vocab, inv_idx_2, idfs, urls)
output

Unnamed: 0,animeTitle,animeDescription,URL,Similarity
3913,Manga Sekai Mukashibanashi,Each episode of this series tells the story of...,https://myanimelist.net/anime/6262/Manga_Sekai...,0.99896
8277,Kuusou no Sora Tobu Kikaitachi,Animated 2002 short film produced by Studio Gh...,https://myanimelist.net/anime/19401/Kuusou_no_...,0.971399
2276,Glass no Kamen,"Kitajima Maya, a 13-year old girl with a talen...",https://myanimelist.net/anime/506/Glass_no_Kamen,0.969861
5665,Chou Hatsumei Boy Kanipan,Cho Hatsumei Boy Kanipan is a continued story ...,https://myanimelist.net/anime/3690/Chou_Hatsum...,0.962115
9253,Marginal Prince: Gekkeiju no Ouji-tachi,"""Marginal Prince"" based off of the hit love si...",https://myanimelist.net/anime/1912/Marginal_Pr...,0.962115
14808,Abe George Kattobi Seishun Ki: Shibuya Honky Tonk,This story is based on one of the famous Japan...,https://myanimelist.net/anime/17501/Abe_George...,0.962115
16923,Kono Shihai kara no Sotsugyou: Ozaki Yutaka,A compilation OVA of 5 stories portraying the ...,https://myanimelist.net/anime/12981/Kono_Shiha...,0.962115
3411,Madou King Granzort,"In the future, the moon is a habitable place w...",https://myanimelist.net/anime/2818/Madou_King_...,0.962115
7689,Jin Sheng Yuan,A famous music video tells a sentimental story...,https://myanimelist.net/anime/10132/Jin_Sheng_...,0.962115
8237,Sekai Meisaku Douwa,Another of Toei's World Famous Fairy Tale seri...,https://myanimelist.net/anime/7398/Sekai_Meisa...,0.962115


## 5. Algorithmic question

Steps to follow:
1. Given an input list of appointments, find all combinations of the possible solutions
2. Check each combination if it is valid or not (no consecutive appointments)
3. For all valid combinations, find their durations
4. Find the combination with the maximum duration
5. Return list of the last step and its duration

Input: appointments_list of length n and distinct values\

routine max_len_appointments(appointments_list):\
    validCombinations = [all combinations in which ther are no consecutive appointments]\
    appointmentDurations = [durations of every instance of validCombinations]\
    maxDuration = max(appointmentDurations)\
    maxLenappointments = [instances of appointmentDurations where duration is maxDuration]
    
    return maxLenappointments, maxDuration
        