# HW3

### Importing Libraries 

In [2]:
import requests
from bs4 import BeautifulSoup as bs
import os
import pickle
import numpy as np
import time
import datetime as dt
import csv
import pandas as pd
import nltk
import re
from nltk.corpus import stopwords
import nltk
import string
import heapq

In [121]:
# nltk.download('stopwords')
# nltk.download('punkt')

[nltk_data] Downloading package punkt to /Users/hassan/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

## 1. Data collection

### 1.1.

In [3]:
URL = "https://myanimelist.net/topanime.php"
urls = [] # list for storing urls of all the anime

def get_urls():
    
    """get_urls() returns the list of the urls for each anime"""
    
    for lim in range(0, 20000, 50):
        r = requests.get(URL, params={"limit": lim})

        if r.status_code == 404: # in case page is inaccessable
            print("Unfortunately, page {} is inaccessable. We're interrupting the operation and returning the pages found.".format(lim))

        soup = bs(r.content, 'html5lib')

        for res in soup.find_all('a', class_='hoverinfo_trigger fl-l ml12 mr8'):
            url = res['href']
            if url not in urls:
                urls.append(url)

    return urls

In [3]:
filename = 'urls.txt'

if filename not in os.listdir(): # create file if not already created
    with open(filename, 'w') as f:
        f.write('\n'.join(list(map(str, urls))))

else: # load file
    with open(filename, 'r', encoding="utf8") as f:
        urls = f.read().split("\n")
        print("urls.txt loaded.")

urls.txt loaded.


In [None]:
get_urls()

In [4]:
print(len(urls)) # number of urls loaded

19218


### 1.2

In [1]:
def crawl_animes(urls_):
    
    """crawl_animes function fetches html of every anime found by the get_url() method. It then
    saves them in an 'htmls' directory. Inside 'htmls' directory, it saves htmls wrt to the page folder
    it belongs to with the fashion 'htmls/page_rank_i/article_j.html'. In order to avoid repeatedly
    downloading the htmls file, a binary file named as 'counter' is created to start from where
    we left off in case of any interruption."""
    
    if 'counter' not in os.listdir(): # initialize counter in case not already created
        start = 0
    else:
        with open('counter', 'rb') as c: # load counter
            start = pickle.load(c) + 1
    print("Starting from anime no. {}".format(start))

    for i in range(start, len(urls_)):
        page_rank = str(int(np.floor(i/50)))
        
        if i%50 == 0 or f"page_rank_{page_rank}" not in os.listdir('./htmls'):
            os.mkdir('htmls/page_rank_{}'.format(page_rank))

        html = requests.get(urls_[i])
        sleep = 20

        while html.status_code != 200:
            print("Waiting {} seconds as we reach request limit while retrieving page no. {}.\n".format(sleep, i))
            html.close()
            time.sleep(sleep)
            html = requests.get(urls_[i])
            sleep += 5

        with open("htmls/page_rank_{}/article_{}.html".format(page_rank, i), "w", encoding="utf-8") as f:
            f.write(html.text)

        with open("counter", "wb") as c:
            pickle.dump(i, c)

In [15]:
if 'htmls' not in os.listdir():
  os.mkdir('htmls')

In [15]:
crawl_animes(urls)

Starting from anime no. 19218


### 1.3

In [1]:
def parse_pages(i_, folder_name="anime_tsvs"):
    
    """This routine parses the htmls we downloaded and fetches the information we are required in the homework
    and saves them in an article_i.tsv file inside anime_tsvs directory."""
    
    print("Working on page {}".format(i_))
    page_rank = str(int(np.floor(i_/50)))
    article_path = "htmls/page_rank_{}/article_{}.html".format(page_rank, i_)

    with open(article_path, 'r', encoding='utf-8') as f:
        article = bs(f.read(), 'html.parser')

    animeTitle = article.find("h1", {"class":"title-name h1_bold_none"}).string
    # print(animeTitle)

    animeType = article.find("span", {"class":"information type"}).string
    # print(animeType)

    contents = article.find_all('div', {'class': "spaceit_pad"})        
    for c in contents:
        span_ = c.find('span', {'class': "dark_text"})
        if span_ is not None:
            if span_.string == "Episodes:":
                if c.contents[2] != '\n  Unknown\n  ':
                    animeNumEpisode = int(c.contents[2])
                else:
                    animeNumEpisode = ''       
    #             print(animeNumEpisode)

            if span_.string == "Aired:":
                dates_ = c.contents[2].string.replace('\n', '').strip().split(' to ')
    #             print(dates_)
                if dates_[0] == 'Not available':
                    releaseDate = ''
                    endDate = ''
                else:
                    if len(dates_) == 2 and '?' not in dates_:                
                        releaseDate = dates_[0]
                        endDate = dates_[1]

                        if len(releaseDate.split(' ')) == 3:
                            releaseDate = dt.datetime.strptime(releaseDate, "%b %d, %Y") # Datetime conversion

                        elif len(releaseDate.split(' ')) == 2:
                            releaseDate = dt.datetime.strptime(releaseDate, "%b %Y")

                        else:
                            releaseDate = print(dt.datetime.strptime(releaseDate, "%Y"))

                        if len(endDate.split(' ')) == 3:
                            endDate = dt.datetime.strptime(endDate, "%b %d, %Y")

                        elif len(endDate.split(' ')) == 2:
                            endDate = dt.datetime.strptime(endDate, "%b %Y")

                        else:
                            endDate = dt.datetime.strptime(endDate, "%Y")
                    else:
                        endDate = ''
                        releaseDate = dates_[0]

                        if len(releaseDate.split(' ')) == 3:
                            releaseDate = dt.datetime.strptime(releaseDate, "%b %d, %Y")

                        elif len(releaseDate.split(' ')) == 2:
                            releaseDate = dt.datetime.strptime(releaseDate, "%b %Y")

                        else:
                            releaseDate = dt.datetime.strptime(releaseDate, "%Y")

    animeNumMembers = int(article.find("span", {"class": "numbers members"}).contents[1].string.replace(',', ''))
    # print(animeNumMembers)

    if article.find("div", {"class": "score-label score-9"}) is not None:
        animeScore = float(article.find("div", {"class": "score-label score-9"}).contents[0])
    else:
        animeScore = ''
    # print(animeScore)

    if article.find("span", {"itemprop": {"ratingCount"}}) is not None:
        animeUsers = int(article.find("span", {"itemprop": {"ratingCount"}}).contents[0])
    else:
        animeUsers = ''
    # print(animeUsers)

    if (article.find("span", {"class": "numbers ranked"}) is not None):
        try:
            animeRank = int(article.find("span", {"class": "numbers ranked"}).contents[1].string[1:])
        except:
            animeRank = ''
    else:
        animeRank = ''
    # print(animeRank)

    if article.find("span", {"class": "numbers popularity"}) is not None:
        animePopularity = int(article.find("span", {"class": "numbers popularity"}).contents[1].string[1:])
    else:
        animePopularity = ''
    # print(animePopularity)

    if article.find("p", {"itemprop": {"description"}}) is not None:
        animeDescription = article.find("p", {"itemprop": {"description"}}).contents[0]
    else:
        animeDescription = ''
    # print(animeDescription)

    animeRelated = []

    tbl_anime = article.find("table", {"class": "anime_detail_related_anime"})
    if tbl_anime is not None:
        anime_links = tbl_anime.find_all("a")
        for e in anime_links:
            animeRelated.append(str(e.text))

        animeRelated = list(set(animeRelated))
        if '' in animeRelated:
            animeRelated.remove('')
        if ' ' in animeRelated:
            animeRelated.remove(' ')
    else:
        animeRelated = ''
    # print(animeRelated)

    animeCharacters = []

    tbl_characters = article.find_all("h3", {"class": "h3_characters_voice_actors"})
    if tbl_characters is not None:
        for e in tbl_characters:
            a_ = e.find("a")
            animeCharacters.append((a_.text))
    else:
        animeCharacters = ''
    # print(animeCharacters)

    animeVoices = []

    tbl_voices = article.find_all("td", {"class": "va-t ar pl4 pr4"})
    if tbl_voices is not None:
        for e in tbl_voices:
            a_ = e.find("a")
            animeVoices.append((a_.text))
    else:
        animeVoices = ''

    # print(animeVoices)

    animeStaff = []
    
    if len(article.find_all('div', {'class': "detail-characters-list clearfix"})) > 1:
        staff = article.find_all('div', {'class': "detail-characters-list clearfix"})[1]
        td = staff.find_all('td', {'class': "borderClass"})
    
        for td_ in td:
            if td_.get('width') == None:
                animeStaff.append([td_.find('a').string, td_.find('small').string])
    else:
        animeStaff = ''
    
#     print(animeStaff)

    with open('{}/anime_{}.tsv'.format(folder_name, i_), 'wt', e # save parsed info. into a tsv file
              ncoding="utf8") as f_:
        tsv_wt = csv.writer(f_, delimiter='\t')
        tsv_wt.writerow([animeTitle, animeType, animeNumEpisode, releaseDate, endDate, animeNumMembers,animeScore, \
                         animeUsers, animeRank, animePopularity, animeDescription, animeRelated, animeCharacters, \
                         animeVoices, animeStaff])

In [7]:
if "anime_tsvs" not in os.listdir():
    os.mkdir("anime_tsvs")
    for i in range(len(urls)):
        parse_pages(i)
        
for i in range(len(urls)):
    parse_pages(i)

Working on page 0
Working on page 1
Working on page 2
Working on page 3
Working on page 4
Working on page 5
Working on page 6
Working on page 7
Working on page 8
Working on page 9
Working on page 10
Working on page 11
Working on page 12
Working on page 13
Working on page 14
Working on page 15
Working on page 16
Working on page 17
Working on page 18
Working on page 19
Working on page 20
Working on page 21
Working on page 22
Working on page 23
Working on page 24
Working on page 25
Working on page 26
Working on page 27
Working on page 28
Working on page 29
Working on page 30
Working on page 31
Working on page 32
Working on page 33
Working on page 34
Working on page 35
Working on page 36
Working on page 37
Working on page 38
Working on page 39
Working on page 40
Working on page 41
Working on page 42
Working on page 43
Working on page 44
Working on page 45
Working on page 46
Working on page 47
Working on page 48
Working on page 49
Working on page 50
Working on page 51
Working on page 52
Wor

Working on page 416
Working on page 417
Working on page 418
Working on page 419
Working on page 420
Working on page 421
Working on page 422
Working on page 423
Working on page 424
Working on page 425
Working on page 426
Working on page 427
Working on page 428
Working on page 429
Working on page 430
Working on page 431
Working on page 432
Working on page 433
Working on page 434
Working on page 435
Working on page 436
Working on page 437
Working on page 438
Working on page 439
Working on page 440
Working on page 441
Working on page 442
Working on page 443
Working on page 444
Working on page 445
Working on page 446
Working on page 447
Working on page 448
Working on page 449
Working on page 450
Working on page 451
Working on page 452
Working on page 453
Working on page 454
Working on page 455
Working on page 456
Working on page 457
Working on page 458
Working on page 459
Working on page 460
Working on page 461
Working on page 462
Working on page 463
Working on page 464
Working on page 465


Working on page 827
Working on page 828
Working on page 829
Working on page 830
Working on page 831
Working on page 832
Working on page 833
Working on page 834
Working on page 835
Working on page 836
Working on page 837
Working on page 838
Working on page 839
Working on page 840
Working on page 841
Working on page 842
Working on page 843
Working on page 844
Working on page 845
Working on page 846
Working on page 847
Working on page 848
Working on page 849
Working on page 850
Working on page 851
Working on page 852
Working on page 853
Working on page 854
Working on page 855
Working on page 856
Working on page 857
Working on page 858
Working on page 859
Working on page 860
Working on page 861
Working on page 862
Working on page 863
Working on page 864
Working on page 865
Working on page 866
Working on page 867
Working on page 868
Working on page 869
Working on page 870
Working on page 871
Working on page 872
Working on page 873
Working on page 874
Working on page 875
Working on page 876


Working on page 1228
Working on page 1229
Working on page 1230
Working on page 1231
Working on page 1232
Working on page 1233
Working on page 1234
Working on page 1235
Working on page 1236
Working on page 1237
Working on page 1238
Working on page 1239
Working on page 1240
Working on page 1241
Working on page 1242
Working on page 1243
Working on page 1244
Working on page 1245
Working on page 1246
Working on page 1247
Working on page 1248
Working on page 1249
Working on page 1250
Working on page 1251
Working on page 1252
Working on page 1253
Working on page 1254
Working on page 1255
Working on page 1256
Working on page 1257
Working on page 1258
Working on page 1259
Working on page 1260
Working on page 1261
Working on page 1262
Working on page 1263
Working on page 1264
Working on page 1265
Working on page 1266
Working on page 1267
Working on page 1268
Working on page 1269
Working on page 1270
Working on page 1271
Working on page 1272
Working on page 1273
Working on page 1274
Working on pa

Working on page 1621
Working on page 1622
Working on page 1623
Working on page 1624
Working on page 1625
Working on page 1626
Working on page 1627
Working on page 1628
Working on page 1629
Working on page 1630
Working on page 1631
Working on page 1632
Working on page 1633
Working on page 1634
Working on page 1635
Working on page 1636
Working on page 1637
Working on page 1638
Working on page 1639
Working on page 1640
Working on page 1641
Working on page 1642
Working on page 1643
Working on page 1644
Working on page 1645
Working on page 1646
Working on page 1647
Working on page 1648
Working on page 1649
Working on page 1650
Working on page 1651
Working on page 1652
Working on page 1653
Working on page 1654
Working on page 1655
Working on page 1656
Working on page 1657
Working on page 1658
Working on page 1659
Working on page 1660
Working on page 1661
Working on page 1662
Working on page 1663
Working on page 1664
Working on page 1665
Working on page 1666
Working on page 1667
Working on pa

Working on page 2015
Working on page 2016
Working on page 2017
Working on page 2018
Working on page 2019
Working on page 2020
Working on page 2021
Working on page 2022
Working on page 2023
Working on page 2024
Working on page 2025
Working on page 2026
Working on page 2027
Working on page 2028
Working on page 2029
Working on page 2030
Working on page 2031
Working on page 2032
Working on page 2033
Working on page 2034
Working on page 2035
Working on page 2036
Working on page 2037
Working on page 2038
Working on page 2039
Working on page 2040
Working on page 2041
Working on page 2042
Working on page 2043
Working on page 2044
Working on page 2045
Working on page 2046
Working on page 2047
Working on page 2048
Working on page 2049
Working on page 2050
Working on page 2051
Working on page 2052
Working on page 2053
Working on page 2054
Working on page 2055
Working on page 2056
Working on page 2057
Working on page 2058
Working on page 2059
Working on page 2060
Working on page 2061
Working on pa

Working on page 2407
Working on page 2408
Working on page 2409
Working on page 2410
Working on page 2411
Working on page 2412
Working on page 2413
Working on page 2414
Working on page 2415
Working on page 2416
Working on page 2417
Working on page 2418
Working on page 2419
Working on page 2420
Working on page 2421
Working on page 2422
Working on page 2423
Working on page 2424
Working on page 2425
Working on page 2426
Working on page 2427
Working on page 2428
Working on page 2429
Working on page 2430
Working on page 2431
Working on page 2432
Working on page 2433
Working on page 2434
Working on page 2435
Working on page 2436
Working on page 2437
Working on page 2438
Working on page 2439
Working on page 2440
Working on page 2441
Working on page 2442
Working on page 2443
Working on page 2444
Working on page 2445
Working on page 2446
Working on page 2447
Working on page 2448
Working on page 2449
Working on page 2450
Working on page 2451
Working on page 2452
Working on page 2453
Working on pa

Working on page 2798
Working on page 2799
Working on page 2800
Working on page 2801
Working on page 2802
Working on page 2803
Working on page 2804
Working on page 2805
Working on page 2806
Working on page 2807
Working on page 2808
Working on page 2809
Working on page 2810
Working on page 2811
Working on page 2812
Working on page 2813
Working on page 2814
Working on page 2815
Working on page 2816
Working on page 2817
Working on page 2818
Working on page 2819
Working on page 2820
Working on page 2821
Working on page 2822
Working on page 2823
Working on page 2824
Working on page 2825
Working on page 2826
Working on page 2827
Working on page 2828
Working on page 2829
Working on page 2830
Working on page 2831
Working on page 2832
Working on page 2833
Working on page 2834
Working on page 2835
Working on page 2836
Working on page 2837
Working on page 2838
Working on page 2839
Working on page 2840
Working on page 2841
Working on page 2842
Working on page 2843
Working on page 2844
Working on pa

Working on page 3191
Working on page 3192
Working on page 3193
Working on page 3194
Working on page 3195
Working on page 3196
Working on page 3197
Working on page 3198
Working on page 3199
Working on page 3200
Working on page 3201
Working on page 3202
Working on page 3203
Working on page 3204
Working on page 3205
Working on page 3206
Working on page 3207
Working on page 3208
Working on page 3209
Working on page 3210
Working on page 3211
Working on page 3212
Working on page 3213
Working on page 3214
Working on page 3215
Working on page 3216
Working on page 3217
Working on page 3218
Working on page 3219
Working on page 3220
Working on page 3221
Working on page 3222
Working on page 3223
Working on page 3224
Working on page 3225
Working on page 3226
Working on page 3227
Working on page 3228
Working on page 3229
Working on page 3230
Working on page 3231
Working on page 3232
Working on page 3233
Working on page 3234
Working on page 3235
Working on page 3236
Working on page 3237
Working on pa

Working on page 3586
Working on page 3587
Working on page 3588
Working on page 3589
Working on page 3590
Working on page 3591
Working on page 3592
Working on page 3593
Working on page 3594
Working on page 3595
Working on page 3596
Working on page 3597
Working on page 3598
Working on page 3599
Working on page 3600
Working on page 3601
Working on page 3602
Working on page 3603
Working on page 3604
Working on page 3605
Working on page 3606
Working on page 3607
Working on page 3608
Working on page 3609
Working on page 3610
Working on page 3611
Working on page 3612
Working on page 3613
Working on page 3614
Working on page 3615
Working on page 3616
Working on page 3617
Working on page 3618
Working on page 3619
Working on page 3620
Working on page 3621
Working on page 3622
Working on page 3623
Working on page 3624
Working on page 3625
Working on page 3626
Working on page 3627
Working on page 3628
Working on page 3629
Working on page 3630
Working on page 3631
Working on page 3632
Working on pa

Working on page 3980
Working on page 3981
Working on page 3982
Working on page 3983
Working on page 3984
Working on page 3985
Working on page 3986
Working on page 3987
Working on page 3988
Working on page 3989
Working on page 3990
Working on page 3991
Working on page 3992
Working on page 3993
Working on page 3994
Working on page 3995
Working on page 3996
Working on page 3997
Working on page 3998
Working on page 3999
Working on page 4000
Working on page 4001
Working on page 4002
Working on page 4003
Working on page 4004
Working on page 4005
Working on page 4006
Working on page 4007
Working on page 4008
Working on page 4009
Working on page 4010
Working on page 4011
Working on page 4012
Working on page 4013
Working on page 4014
Working on page 4015
Working on page 4016
Working on page 4017
Working on page 4018
Working on page 4019
Working on page 4020
Working on page 4021
Working on page 4022
Working on page 4023
Working on page 4024
Working on page 4025
Working on page 4026
Working on pa

Working on page 4372
Working on page 4373
Working on page 4374
Working on page 4375
Working on page 4376
Working on page 4377
Working on page 4378
Working on page 4379
Working on page 4380
Working on page 4381
Working on page 4382
Working on page 4383
Working on page 4384
Working on page 4385
Working on page 4386
Working on page 4387
Working on page 4388
Working on page 4389
Working on page 4390
Working on page 4391
Working on page 4392
Working on page 4393
Working on page 4394
Working on page 4395
Working on page 4396
Working on page 4397
Working on page 4398
Working on page 4399
Working on page 4400
Working on page 4401
Working on page 4402
Working on page 4403
Working on page 4404
Working on page 4405
Working on page 4406
Working on page 4407
Working on page 4408
Working on page 4409
Working on page 4410
Working on page 4411
Working on page 4412
Working on page 4413
Working on page 4414
Working on page 4415
Working on page 4416
Working on page 4417
Working on page 4418
Working on pa

Working on page 4765
Working on page 4766
Working on page 4767
Working on page 4768
Working on page 4769
Working on page 4770
Working on page 4771
Working on page 4772
Working on page 4773
Working on page 4774
Working on page 4775
Working on page 4776
Working on page 4777
Working on page 4778
Working on page 4779
Working on page 4780
Working on page 4781
Working on page 4782
Working on page 4783
Working on page 4784
Working on page 4785
Working on page 4786
Working on page 4787
Working on page 4788
Working on page 4789
Working on page 4790
Working on page 4791
Working on page 4792
Working on page 4793
Working on page 4794
Working on page 4795
Working on page 4796
Working on page 4797
Working on page 4798
Working on page 4799
Working on page 4800
Working on page 4801
Working on page 4802
Working on page 4803
Working on page 4804
Working on page 4805
Working on page 4806
Working on page 4807
Working on page 4808
Working on page 4809
Working on page 4810
Working on page 4811
Working on pa

Working on page 5155
Working on page 5156
Working on page 5157
Working on page 5158
Working on page 5159
Working on page 5160
Working on page 5161
Working on page 5162
Working on page 5163
Working on page 5164
Working on page 5165
Working on page 5166
Working on page 5167
Working on page 5168
Working on page 5169
Working on page 5170
Working on page 5171
Working on page 5172
Working on page 5173
Working on page 5174
Working on page 5175
Working on page 5176
Working on page 5177
Working on page 5178
Working on page 5179
Working on page 5180
Working on page 5181
Working on page 5182
Working on page 5183
Working on page 5184
Working on page 5185
Working on page 5186
Working on page 5187
Working on page 5188
Working on page 5189
Working on page 5190
Working on page 5191
Working on page 5192
Working on page 5193
Working on page 5194
Working on page 5195
Working on page 5196
Working on page 5197
Working on page 5198
Working on page 5199
Working on page 5200
Working on page 5201
Working on pa

Working on page 5549
Working on page 5550
Working on page 5551
Working on page 5552
Working on page 5553
Working on page 5554
Working on page 5555
Working on page 5556
Working on page 5557
Working on page 5558
Working on page 5559
Working on page 5560
Working on page 5561
Working on page 5562
Working on page 5563
Working on page 5564
Working on page 5565
Working on page 5566
Working on page 5567
Working on page 5568
Working on page 5569
Working on page 5570
Working on page 5571
Working on page 5572
Working on page 5573
Working on page 5574
Working on page 5575
Working on page 5576
Working on page 5577
Working on page 5578
Working on page 5579
Working on page 5580
Working on page 5581
Working on page 5582
Working on page 5583
Working on page 5584
Working on page 5585
Working on page 5586
Working on page 5587
Working on page 5588
Working on page 5589
Working on page 5590
Working on page 5591
Working on page 5592
Working on page 5593
Working on page 5594
Working on page 5595
Working on pa

Working on page 5937
Working on page 5938
Working on page 5939
Working on page 5940
Working on page 5941
Working on page 5942
Working on page 5943
Working on page 5944
Working on page 5945
Working on page 5946
Working on page 5947
Working on page 5948
Working on page 5949
Working on page 5950
Working on page 5951
Working on page 5952
Working on page 5953
Working on page 5954
Working on page 5955
Working on page 5956
Working on page 5957
Working on page 5958
Working on page 5959
Working on page 5960
Working on page 5961
Working on page 5962
Working on page 5963
Working on page 5964
Working on page 5965
Working on page 5966
Working on page 5967
Working on page 5968
Working on page 5969
Working on page 5970
Working on page 5971
Working on page 5972
Working on page 5973
Working on page 5974
Working on page 5975
Working on page 5976
Working on page 5977
Working on page 5978
Working on page 5979
Working on page 5980
Working on page 5981
Working on page 5982
Working on page 5983
Working on pa

Working on page 6328
Working on page 6329
Working on page 6330
Working on page 6331
Working on page 6332
Working on page 6333
Working on page 6334
Working on page 6335
Working on page 6336
Working on page 6337
Working on page 6338
Working on page 6339
Working on page 6340
Working on page 6341
Working on page 6342
Working on page 6343
Working on page 6344
Working on page 6345
Working on page 6346
Working on page 6347
Working on page 6348
Working on page 6349
Working on page 6350
Working on page 6351
Working on page 6352
Working on page 6353
Working on page 6354
Working on page 6355
Working on page 6356
Working on page 6357
Working on page 6358
Working on page 6359
Working on page 6360
Working on page 6361
Working on page 6362
Working on page 6363
Working on page 6364
Working on page 6365
Working on page 6366
Working on page 6367
Working on page 6368
Working on page 6369
Working on page 6370
Working on page 6371
Working on page 6372
Working on page 6373
Working on page 6374
Working on pa

Working on page 6720
Working on page 6721
Working on page 6722
Working on page 6723
Working on page 6724
Working on page 6725
Working on page 6726
Working on page 6727
Working on page 6728
Working on page 6729
Working on page 6730
Working on page 6731
Working on page 6732
Working on page 6733
Working on page 6734
Working on page 6735
Working on page 6736
Working on page 6737
Working on page 6738
Working on page 6739
Working on page 6740
Working on page 6741
Working on page 6742
Working on page 6743
Working on page 6744
Working on page 6745
Working on page 6746
Working on page 6747
Working on page 6748
Working on page 6749
Working on page 6750
Working on page 6751
Working on page 6752
Working on page 6753
Working on page 6754
Working on page 6755
Working on page 6756
Working on page 6757
Working on page 6758
Working on page 6759
Working on page 6760
Working on page 6761
Working on page 6762
Working on page 6763
Working on page 6764
Working on page 6765
Working on page 6766
Working on pa

Working on page 7112
Working on page 7113
Working on page 7114
Working on page 7115
Working on page 7116
Working on page 7117
Working on page 7118
Working on page 7119
Working on page 7120
Working on page 7121
Working on page 7122
Working on page 7123
Working on page 7124
Working on page 7125
Working on page 7126
Working on page 7127
Working on page 7128
Working on page 7129
Working on page 7130
Working on page 7131
Working on page 7132
Working on page 7133
Working on page 7134
Working on page 7135
Working on page 7136
Working on page 7137
Working on page 7138
Working on page 7139
Working on page 7140
Working on page 7141
Working on page 7142
Working on page 7143
Working on page 7144
Working on page 7145
Working on page 7146
Working on page 7147
Working on page 7148
Working on page 7149
Working on page 7150
Working on page 7151
Working on page 7152
Working on page 7153
Working on page 7154
Working on page 7155
Working on page 7156
Working on page 7157
Working on page 7158
Working on pa

Working on page 7506
Working on page 7507
Working on page 7508
Working on page 7509
Working on page 7510
Working on page 7511
Working on page 7512
Working on page 7513
Working on page 7514
Working on page 7515
Working on page 7516
Working on page 7517
Working on page 7518
Working on page 7519
Working on page 7520
Working on page 7521
Working on page 7522
Working on page 7523
Working on page 7524
Working on page 7525
Working on page 7526
Working on page 7527
Working on page 7528
Working on page 7529
Working on page 7530
Working on page 7531
Working on page 7532
Working on page 7533
Working on page 7534
Working on page 7535
Working on page 7536
Working on page 7537
Working on page 7538
Working on page 7539
Working on page 7540
Working on page 7541
Working on page 7542
Working on page 7543
Working on page 7544
Working on page 7545
Working on page 7546
Working on page 7547
Working on page 7548
Working on page 7549
Working on page 7550
Working on page 7551
Working on page 7552
Working on pa

Working on page 7898
Working on page 7899
Working on page 7900
Working on page 7901
Working on page 7902
Working on page 7903
Working on page 7904
Working on page 7905
Working on page 7906
Working on page 7907
Working on page 7908
Working on page 7909
Working on page 7910
Working on page 7911
Working on page 7912
Working on page 7913
Working on page 7914
Working on page 7915
Working on page 7916
Working on page 7917
Working on page 7918
Working on page 7919
Working on page 7920
Working on page 7921
Working on page 7922
Working on page 7923
Working on page 7924
Working on page 7925
Working on page 7926
Working on page 7927
Working on page 7928
Working on page 7929
Working on page 7930
Working on page 7931
Working on page 7932
Working on page 7933
Working on page 7934
Working on page 7935
Working on page 7936
Working on page 7937
Working on page 7938
Working on page 7939
Working on page 7940
Working on page 7941
Working on page 7942
Working on page 7943
Working on page 7944
Working on pa

Working on page 8289
Working on page 8290
Working on page 8291
Working on page 8292
Working on page 8293
Working on page 8294
Working on page 8295
Working on page 8296
Working on page 8297
Working on page 8298
Working on page 8299
Working on page 8300
Working on page 8301
Working on page 8302
Working on page 8303
Working on page 8304
Working on page 8305
Working on page 8306
Working on page 8307
Working on page 8308
Working on page 8309
Working on page 8310
Working on page 8311
Working on page 8312
Working on page 8313
Working on page 8314
Working on page 8315
Working on page 8316
Working on page 8317
Working on page 8318
Working on page 8319
Working on page 8320
Working on page 8321
Working on page 8322
Working on page 8323
Working on page 8324
Working on page 8325
Working on page 8326
Working on page 8327
Working on page 8328
Working on page 8329
Working on page 8330
Working on page 8331
Working on page 8332
Working on page 8333
Working on page 8334
Working on page 8335
Working on pa

Working on page 8683
Working on page 8684
Working on page 8685
Working on page 8686
Working on page 8687
Working on page 8688
Working on page 8689
Working on page 8690
Working on page 8691
Working on page 8692
Working on page 8693
Working on page 8694
Working on page 8695
Working on page 8696
Working on page 8697
Working on page 8698
Working on page 8699
Working on page 8700
Working on page 8701
Working on page 8702
Working on page 8703
Working on page 8704
Working on page 8705
Working on page 8706
Working on page 8707
Working on page 8708
Working on page 8709
Working on page 8710
Working on page 8711
Working on page 8712
Working on page 8713
Working on page 8714
Working on page 8715
Working on page 8716
Working on page 8717
Working on page 8718
Working on page 8719
Working on page 8720
Working on page 8721
Working on page 8722
Working on page 8723
Working on page 8724
Working on page 8725
Working on page 8726
Working on page 8727
Working on page 8728
Working on page 8729
Working on pa

Working on page 9074
Working on page 9075
Working on page 9076
Working on page 9077
Working on page 9078
Working on page 9079
Working on page 9080
Working on page 9081
Working on page 9082
Working on page 9083
Working on page 9084
Working on page 9085
Working on page 9086
Working on page 9087
Working on page 9088
Working on page 9089
Working on page 9090
Working on page 9091
Working on page 9092
Working on page 9093
Working on page 9094
Working on page 9095
Working on page 9096
Working on page 9097
Working on page 9098
Working on page 9099
Working on page 9100
Working on page 9101
Working on page 9102
Working on page 9103
Working on page 9104
Working on page 9105
Working on page 9106
Working on page 9107
Working on page 9108
Working on page 9109
Working on page 9110
Working on page 9111
Working on page 9112
Working on page 9113
Working on page 9114
Working on page 9115
Working on page 9116
Working on page 9117
Working on page 9118
Working on page 9119
Working on page 9120
Working on pa

Working on page 9464
Working on page 9465
Working on page 9466
Working on page 9467
Working on page 9468
Working on page 9469
Working on page 9470
Working on page 9471
Working on page 9472
Working on page 9473
Working on page 9474
Working on page 9475
Working on page 9476
Working on page 9477
Working on page 9478
Working on page 9479
Working on page 9480
Working on page 9481
Working on page 9482
Working on page 9483
Working on page 9484
Working on page 9485
Working on page 9486
Working on page 9487
Working on page 9488
Working on page 9489
Working on page 9490
Working on page 9491
Working on page 9492
Working on page 9493
Working on page 9494
Working on page 9495
Working on page 9496
Working on page 9497
Working on page 9498
Working on page 9499
Working on page 9500
Working on page 9501
Working on page 9502
Working on page 9503
Working on page 9504
Working on page 9505
Working on page 9506
Working on page 9507
Working on page 9508
Working on page 9509
Working on page 9510
Working on pa

Working on page 9857
Working on page 9858
Working on page 9859
Working on page 9860
Working on page 9861
Working on page 9862
Working on page 9863
Working on page 9864
Working on page 9865
Working on page 9866
Working on page 9867
Working on page 9868
Working on page 9869
Working on page 9870
Working on page 9871
Working on page 9872
Working on page 9873
Working on page 9874
Working on page 9875
Working on page 9876
Working on page 9877
Working on page 9878
Working on page 9879
Working on page 9880
Working on page 9881
Working on page 9882
Working on page 9883
Working on page 9884
Working on page 9885
Working on page 9886
Working on page 9887
Working on page 9888
Working on page 9889
Working on page 9890
Working on page 9891
Working on page 9892
Working on page 9893
Working on page 9894
Working on page 9895
Working on page 9896
Working on page 9897
Working on page 9898
Working on page 9899
Working on page 9900
Working on page 9901
Working on page 9902
Working on page 9903
Working on pa

Working on page 10235
Working on page 10236
Working on page 10237
Working on page 10238
Working on page 10239
Working on page 10240
Working on page 10241
Working on page 10242
Working on page 10243
Working on page 10244
Working on page 10245
Working on page 10246
Working on page 10247
Working on page 10248
Working on page 10249
Working on page 10250
Working on page 10251
Working on page 10252
Working on page 10253
Working on page 10254
Working on page 10255
Working on page 10256
Working on page 10257
Working on page 10258
Working on page 10259
Working on page 10260
Working on page 10261
Working on page 10262
Working on page 10263
Working on page 10264
Working on page 10265
Working on page 10266
Working on page 10267
Working on page 10268
Working on page 10269
Working on page 10270
Working on page 10271
Working on page 10272
Working on page 10273
Working on page 10274
Working on page 10275
Working on page 10276
Working on page 10277
Working on page 10278
Working on page 10279
Working on

Working on page 10608
Working on page 10609
Working on page 10610
Working on page 10611
Working on page 10612
Working on page 10613
Working on page 10614
Working on page 10615
Working on page 10616
Working on page 10617
Working on page 10618
Working on page 10619
Working on page 10620
Working on page 10621
Working on page 10622
Working on page 10623
Working on page 10624
Working on page 10625
Working on page 10626
Working on page 10627
Working on page 10628
Working on page 10629
Working on page 10630
Working on page 10631
Working on page 10632
Working on page 10633
Working on page 10634
Working on page 10635
Working on page 10636
Working on page 10637
Working on page 10638
Working on page 10639
Working on page 10640
Working on page 10641
Working on page 10642
Working on page 10643
Working on page 10644
Working on page 10645
Working on page 10646
Working on page 10647
Working on page 10648
Working on page 10649
Working on page 10650
Working on page 10651
Working on page 10652
Working on

Working on page 10982
Working on page 10983
Working on page 10984
Working on page 10985
Working on page 10986
Working on page 10987
Working on page 10988
Working on page 10989
Working on page 10990
Working on page 10991
Working on page 10992
Working on page 10993
Working on page 10994
Working on page 10995
Working on page 10996
Working on page 10997
Working on page 10998
Working on page 10999
Working on page 11000
Working on page 11001
Working on page 11002
Working on page 11003
Working on page 11004
Working on page 11005
Working on page 11006
Working on page 11007
Working on page 11008
Working on page 11009
Working on page 11010
Working on page 11011
Working on page 11012
Working on page 11013
Working on page 11014
Working on page 11015
Working on page 11016
Working on page 11017
Working on page 11018
Working on page 11019
Working on page 11020
Working on page 11021
Working on page 11022
Working on page 11023
Working on page 11024
Working on page 11025
Working on page 11026
Working on

Working on page 11356
Working on page 11357
Working on page 11358
Working on page 11359
Working on page 11360
Working on page 11361
Working on page 11362
Working on page 11363
Working on page 11364
Working on page 11365
Working on page 11366
Working on page 11367
Working on page 11368
Working on page 11369
Working on page 11370
Working on page 11371
Working on page 11372
Working on page 11373
Working on page 11374
Working on page 11375
Working on page 11376
Working on page 11377
Working on page 11378
Working on page 11379
Working on page 11380
Working on page 11381
Working on page 11382
Working on page 11383
Working on page 11384
Working on page 11385
Working on page 11386
Working on page 11387
Working on page 11388
Working on page 11389
Working on page 11390
Working on page 11391
Working on page 11392
Working on page 11393
Working on page 11394
Working on page 11395
Working on page 11396
Working on page 11397
Working on page 11398
Working on page 11399
Working on page 11400
Working on

Working on page 11731
Working on page 11732
Working on page 11733
Working on page 11734
Working on page 11735
Working on page 11736
Working on page 11737
Working on page 11738
Working on page 11739
Working on page 11740
Working on page 11741
Working on page 11742
Working on page 11743
Working on page 11744
Working on page 11745
Working on page 11746
Working on page 11747
Working on page 11748
Working on page 11749
Working on page 11750
Working on page 11751
Working on page 11752
Working on page 11753
Working on page 11754
Working on page 11755
Working on page 11756
Working on page 11757
Working on page 11758
Working on page 11759
Working on page 11760
Working on page 11761
Working on page 11762
Working on page 11763
Working on page 11764
Working on page 11765
Working on page 11766
Working on page 11767
Working on page 11768
Working on page 11769
Working on page 11770
Working on page 11771
Working on page 11772
Working on page 11773
Working on page 11774
Working on page 11775
Working on

Working on page 12106
Working on page 12107
Working on page 12108
Working on page 12109
Working on page 12110
Working on page 12111
Working on page 12112
Working on page 12113
Working on page 12114
Working on page 12115
Working on page 12116
Working on page 12117
Working on page 12118
Working on page 12119
Working on page 12120
Working on page 12121
Working on page 12122
Working on page 12123
Working on page 12124
Working on page 12125
Working on page 12126
Working on page 12127
Working on page 12128
Working on page 12129
Working on page 12130
Working on page 12131
Working on page 12132
Working on page 12133
Working on page 12134
Working on page 12135
Working on page 12136
Working on page 12137
Working on page 12138
Working on page 12139
Working on page 12140
Working on page 12141
Working on page 12142
Working on page 12143
Working on page 12144
Working on page 12145
Working on page 12146
Working on page 12147
Working on page 12148
Working on page 12149
Working on page 12150
Working on

Working on page 12478
Working on page 12479
Working on page 12480
Working on page 12481
2020-01-01 00:00:00
Working on page 12482
Working on page 12483
Working on page 12484
Working on page 12485
Working on page 12486
Working on page 12487
Working on page 12488
Working on page 12489
Working on page 12490
Working on page 12491
Working on page 12492
Working on page 12493
Working on page 12494
Working on page 12495
Working on page 12496
Working on page 12497
Working on page 12498
Working on page 12499
Working on page 12500
Working on page 12501
Working on page 12502
Working on page 12503
1995-01-01 00:00:00
Working on page 12504
Working on page 12505
Working on page 12506
Working on page 12507
Working on page 12508
Working on page 12509
Working on page 12510
Working on page 12511
Working on page 12512
Working on page 12513
Working on page 12514
Working on page 12515
Working on page 12516
Working on page 12517
Working on page 12518
Working on page 12519
Working on page 12520
Working on pag

Working on page 12849
Working on page 12850
Working on page 12851
Working on page 12852
Working on page 12853
Working on page 12854
Working on page 12855
Working on page 12856
Working on page 12857
Working on page 12858
Working on page 12859
Working on page 12860
Working on page 12861
Working on page 12862
Working on page 12863
Working on page 12864
Working on page 12865
Working on page 12866
Working on page 12867
Working on page 12868
Working on page 12869
Working on page 12870
Working on page 12871
Working on page 12872
Working on page 12873
Working on page 12874
Working on page 12875
Working on page 12876
Working on page 12877
Working on page 12878
Working on page 12879
Working on page 12880
Working on page 12881
Working on page 12882
Working on page 12883
Working on page 12884
Working on page 12885
Working on page 12886
Working on page 12887
Working on page 12888
Working on page 12889
Working on page 12890
Working on page 12891
Working on page 12892
Working on page 12893
Working on

Working on page 13223
Working on page 13224
Working on page 13225
Working on page 13226
Working on page 13227
Working on page 13228
Working on page 13229
Working on page 13230
Working on page 13231
Working on page 13232
Working on page 13233
Working on page 13234
Working on page 13235
Working on page 13236
Working on page 13237
Working on page 13238
Working on page 13239
Working on page 13240
1992-01-01 00:00:00
Working on page 13241
Working on page 13242
Working on page 13243
Working on page 13244
Working on page 13245
Working on page 13246
Working on page 13247
Working on page 13248
Working on page 13249
Working on page 13250
Working on page 13251
Working on page 13252
Working on page 13253
Working on page 13254
Working on page 13255
Working on page 13256
Working on page 13257
Working on page 13258
Working on page 13259
Working on page 13260
Working on page 13261
Working on page 13262
Working on page 13263
Working on page 13264
Working on page 13265
Working on page 13266
Working on p

Working on page 13592
Working on page 13593
Working on page 13594
Working on page 13595
Working on page 13596
Working on page 13597
Working on page 13598
Working on page 13599
Working on page 13600
Working on page 13601
Working on page 13602
Working on page 13603
Working on page 13604
Working on page 13605
Working on page 13606
Working on page 13607
Working on page 13608
Working on page 13609
Working on page 13610
Working on page 13611
Working on page 13612
Working on page 13613
Working on page 13614
Working on page 13615
Working on page 13616
Working on page 13617
Working on page 13618
Working on page 13619
Working on page 13620
Working on page 13621
Working on page 13622
Working on page 13623
Working on page 13624
Working on page 13625
Working on page 13626
Working on page 13627
Working on page 13628
Working on page 13629
Working on page 13630
Working on page 13631
Working on page 13632
Working on page 13633
Working on page 13634
Working on page 13635
Working on page 13636
Working on

Working on page 13963
Working on page 13964
Working on page 13965
Working on page 13966
Working on page 13967
Working on page 13968
Working on page 13969
Working on page 13970
Working on page 13971
Working on page 13972
Working on page 13973
Working on page 13974
Working on page 13975
Working on page 13976
Working on page 13977
Working on page 13978
Working on page 13979
Working on page 13980
Working on page 13981
Working on page 13982
Working on page 13983
Working on page 13984
Working on page 13985
Working on page 13986
Working on page 13987
Working on page 13988
Working on page 13989
Working on page 13990
Working on page 13991
1992-01-01 00:00:00
Working on page 13992
Working on page 13993
Working on page 13994
Working on page 13995
Working on page 13996
Working on page 13997
Working on page 13998
Working on page 13999
Working on page 14000
Working on page 14001
Working on page 14002
Working on page 14003
Working on page 14004
Working on page 14005
Working on page 14006
Working on p

Working on page 14337
Working on page 14338
Working on page 14339
Working on page 14340
Working on page 14341
Working on page 14342
Working on page 14343
Working on page 14344
Working on page 14345
Working on page 14346
Working on page 14347
Working on page 14348
Working on page 14349
Working on page 14350
Working on page 14351
Working on page 14352
Working on page 14353
Working on page 14354
Working on page 14355
Working on page 14356
Working on page 14357
Working on page 14358
Working on page 14359
Working on page 14360
Working on page 14361
Working on page 14362
Working on page 14363
Working on page 14364
Working on page 14365
Working on page 14366
Working on page 14367
Working on page 14368
Working on page 14369
Working on page 14370
Working on page 14371
Working on page 14372
Working on page 14373
Working on page 14374
Working on page 14375
Working on page 14376
Working on page 14377
Working on page 14378
Working on page 14379
Working on page 14380
Working on page 14381
Working on

Working on page 14710
Working on page 14711
Working on page 14712
Working on page 14713
Working on page 14714
Working on page 14715
Working on page 14716
Working on page 14717
Working on page 14718
Working on page 14719
Working on page 14720
Working on page 14721
Working on page 14722
Working on page 14723
Working on page 14724
Working on page 14725
Working on page 14726
Working on page 14727
Working on page 14728
Working on page 14729
Working on page 14730
Working on page 14731
Working on page 14732
Working on page 14733
Working on page 14734
Working on page 14735
Working on page 14736
Working on page 14737
Working on page 14738
Working on page 14739
Working on page 14740
Working on page 14741
Working on page 14742
Working on page 14743
Working on page 14744
Working on page 14745
Working on page 14746
Working on page 14747
Working on page 14748
Working on page 14749
Working on page 14750
Working on page 14751
Working on page 14752
Working on page 14753
Working on page 14754
1995-01-01

Working on page 15083
Working on page 15084
Working on page 15085
Working on page 15086
Working on page 15087
Working on page 15088
Working on page 15089
Working on page 15090
Working on page 15091
Working on page 15092
Working on page 15093
Working on page 15094
Working on page 15095
Working on page 15096
Working on page 15097
Working on page 15098
Working on page 15099
Working on page 15100
Working on page 15101
Working on page 15102
Working on page 15103
Working on page 15104
Working on page 15105
Working on page 15106
Working on page 15107
Working on page 15108
Working on page 15109
Working on page 15110
Working on page 15111
Working on page 15112
Working on page 15113
Working on page 15114
Working on page 15115
Working on page 15116
Working on page 15117
Working on page 15118
Working on page 15119
Working on page 15120
Working on page 15121
Working on page 15122
Working on page 15123
Working on page 15124
Working on page 15125
Working on page 15126
Working on page 15127
Working on

Working on page 15450
Working on page 15451
Working on page 15452
Working on page 15453
Working on page 15454
Working on page 15455
Working on page 15456
Working on page 15457
Working on page 15458
Working on page 15459
Working on page 15460
Working on page 15461
Working on page 15462
Working on page 15463
Working on page 15464
Working on page 15465
Working on page 15466
Working on page 15467
Working on page 15468
Working on page 15469
Working on page 15470
Working on page 15471
Working on page 15472
Working on page 15473
Working on page 15474
Working on page 15475
Working on page 15476
Working on page 15477
Working on page 15478
Working on page 15479
Working on page 15480
Working on page 15481
Working on page 15482
Working on page 15483
Working on page 15484
Working on page 15485
Working on page 15486
Working on page 15487
Working on page 15488
Working on page 15489
Working on page 15490
Working on page 15491
Working on page 15492
Working on page 15493
Working on page 15494
Working on

Working on page 15819
Working on page 15820
Working on page 15821
Working on page 15822
Working on page 15823
Working on page 15824
Working on page 15825
Working on page 15826
Working on page 15827
Working on page 15828
Working on page 15829
Working on page 15830
Working on page 15831
Working on page 15832
Working on page 15833
Working on page 15834
Working on page 15835
Working on page 15836
Working on page 15837
Working on page 15838
Working on page 15839
Working on page 15840
Working on page 15841
Working on page 15842
Working on page 15843
Working on page 15844
Working on page 15845
Working on page 15846
Working on page 15847
Working on page 15848
2012-01-01 00:00:00
Working on page 15849
Working on page 15850
Working on page 15851
Working on page 15852
Working on page 15853
Working on page 15854
Working on page 15855
Working on page 15856
Working on page 15857
Working on page 15858
1962-01-01 00:00:00
Working on page 15859
Working on page 15860
Working on page 15861
Working on pag

Working on page 16187
Working on page 16188
Working on page 16189
Working on page 16190
Working on page 16191
Working on page 16192
Working on page 16193
Working on page 16194
Working on page 16195
Working on page 16196
Working on page 16197
Working on page 16198
Working on page 16199
Working on page 16200
Working on page 16201
Working on page 16202
Working on page 16203
Working on page 16204
Working on page 16205
Working on page 16206
Working on page 16207
Working on page 16208
Working on page 16209
Working on page 16210
Working on page 16211
Working on page 16212
Working on page 16213
Working on page 16214
Working on page 16215
Working on page 16216
Working on page 16217
Working on page 16218
Working on page 16219
Working on page 16220
Working on page 16221
Working on page 16222
Working on page 16223
Working on page 16224
Working on page 16225
Working on page 16226
Working on page 16227
Working on page 16228
Working on page 16229
Working on page 16230
Working on page 16231
Working on

Working on page 16555
Working on page 16556
Working on page 16557
Working on page 16558
Working on page 16559
Working on page 16560
Working on page 16561
Working on page 16562
Working on page 16563
Working on page 16564
Working on page 16565
Working on page 16566
Working on page 16567
Working on page 16568
Working on page 16569
Working on page 16570
Working on page 16571
Working on page 16572
Working on page 16573
Working on page 16574
Working on page 16575
Working on page 16576
Working on page 16577
Working on page 16578
Working on page 16579
Working on page 16580
Working on page 16581
Working on page 16582
Working on page 16583
Working on page 16584
Working on page 16585
Working on page 16586
Working on page 16587
Working on page 16588
Working on page 16589
Working on page 16590
Working on page 16591
Working on page 16592
Working on page 16593
Working on page 16594
Working on page 16595
Working on page 16596
Working on page 16597
Working on page 16598
Working on page 16599
Working on

Working on page 16930
Working on page 16931
Working on page 16932
Working on page 16933
Working on page 16934
Working on page 16935
Working on page 16936
Working on page 16937
Working on page 16938
Working on page 16939
Working on page 16940
Working on page 16941
Working on page 16942
Working on page 16943
Working on page 16944
Working on page 16945
Working on page 16946
Working on page 16947
Working on page 16948
Working on page 16949
Working on page 16950
Working on page 16951
Working on page 16952
Working on page 16953
Working on page 16954
Working on page 16955
Working on page 16956
Working on page 16957
Working on page 16958
1983-01-01 00:00:00
Working on page 16959
Working on page 16960
Working on page 16961
Working on page 16962
Working on page 16963
Working on page 16964
Working on page 16965
Working on page 16966
Working on page 16967
Working on page 16968
Working on page 16969
Working on page 16970
Working on page 16971
Working on page 16972
2012-01-01 00:00:00
Working on pag

Working on page 17299
Working on page 17300
Working on page 17301
Working on page 17302
Working on page 17303
Working on page 17304
Working on page 17305
Working on page 17306
Working on page 17307
Working on page 17308
Working on page 17309
Working on page 17310
Working on page 17311
Working on page 17312
Working on page 17313
Working on page 17314
Working on page 17315
Working on page 17316
Working on page 17317
Working on page 17318
Working on page 17319
Working on page 17320
Working on page 17321
Working on page 17322
Working on page 17323
Working on page 17324
Working on page 17325
Working on page 17326
Working on page 17327
Working on page 17328
Working on page 17329
Working on page 17330
Working on page 17331
Working on page 17332
Working on page 17333
Working on page 17334
Working on page 17335
Working on page 17336
Working on page 17337
Working on page 17338
Working on page 17339
Working on page 17340
Working on page 17341
Working on page 17342
Working on page 17343
Working on

Working on page 17676
Working on page 17677
Working on page 17678
Working on page 17679
Working on page 17680
Working on page 17681
Working on page 17682
Working on page 17683
Working on page 17684
Working on page 17685
Working on page 17686
Working on page 17687
Working on page 17688
Working on page 17689
Working on page 17690
Working on page 17691
Working on page 17692
Working on page 17693
Working on page 17694
Working on page 17695
Working on page 17696
Working on page 17697
Working on page 17698
Working on page 17699
Working on page 17700
Working on page 17701
Working on page 17702
Working on page 17703
Working on page 17704
Working on page 17705
Working on page 17706
Working on page 17707
Working on page 17708
Working on page 17709
Working on page 17710
Working on page 17711
Working on page 17712
Working on page 17713
Working on page 17714
Working on page 17715
Working on page 17716
Working on page 17717
Working on page 17718
Working on page 17719
Working on page 17720
Working on

Working on page 18049
Working on page 18050
Working on page 18051
Working on page 18052
Working on page 18053
Working on page 18054
Working on page 18055
Working on page 18056
Working on page 18057
Working on page 18058
Working on page 18059
Working on page 18060
Working on page 18061
Working on page 18062
Working on page 18063
Working on page 18064
Working on page 18065
Working on page 18066
Working on page 18067
Working on page 18068
Working on page 18069
Working on page 18070
Working on page 18071
Working on page 18072
Working on page 18073
Working on page 18074
Working on page 18075
Working on page 18076
Working on page 18077
Working on page 18078
Working on page 18079
Working on page 18080
Working on page 18081
Working on page 18082
Working on page 18083
Working on page 18084
Working on page 18085
Working on page 18086
Working on page 18087
Working on page 18088
Working on page 18089
Working on page 18090
Working on page 18091
Working on page 18092
Working on page 18093
Working on

Working on page 18424
Working on page 18425
Working on page 18426
Working on page 18427
Working on page 18428
Working on page 18429
Working on page 18430
Working on page 18431
Working on page 18432
Working on page 18433
Working on page 18434
Working on page 18435
Working on page 18436
Working on page 18437
Working on page 18438
Working on page 18439
Working on page 18440
Working on page 18441
Working on page 18442
Working on page 18443
Working on page 18444
Working on page 18445
Working on page 18446
Working on page 18447
Working on page 18448
Working on page 18449
Working on page 18450
Working on page 18451
Working on page 18452
Working on page 18453
Working on page 18454
Working on page 18455
Working on page 18456
Working on page 18457
Working on page 18458
Working on page 18459
Working on page 18460
Working on page 18461
Working on page 18462
Working on page 18463
Working on page 18464
Working on page 18465
Working on page 18466
Working on page 18467
Working on page 18468
Working on

Working on page 18800
Working on page 18801
Working on page 18802
Working on page 18803
Working on page 18804
Working on page 18805
Working on page 18806
Working on page 18807
Working on page 18808
Working on page 18809
Working on page 18810
Working on page 18811
Working on page 18812
Working on page 18813
Working on page 18814
Working on page 18815
Working on page 18816
Working on page 18817
Working on page 18818
Working on page 18819
Working on page 18820
Working on page 18821
Working on page 18822
Working on page 18823
Working on page 18824
Working on page 18825
Working on page 18826
Working on page 18827
Working on page 18828
Working on page 18829
Working on page 18830
Working on page 18831
Working on page 18832
Working on page 18833
Working on page 18834
Working on page 18835
Working on page 18836
Working on page 18837
Working on page 18838
Working on page 18839
Working on page 18840
Working on page 18841
Working on page 18842
Working on page 18843
Working on page 18844
Working on

Working on page 19173
Working on page 19174
Working on page 19175
Working on page 19176
Working on page 19177
Working on page 19178
Working on page 19179
Working on page 19180
Working on page 19181
Working on page 19182
Working on page 19183
Working on page 19184
Working on page 19185
Working on page 19186
Working on page 19187
Working on page 19188
Working on page 19189
Working on page 19190
Working on page 19191
Working on page 19192
Working on page 19193
Working on page 19194
Working on page 19195
Working on page 19196
Working on page 19197
Working on page 19198
Working on page 19199
Working on page 19200
Working on page 19201
Working on page 19202
Working on page 19203
Working on page 19204
Working on page 19205
Working on page 19206
Working on page 19207
Working on page 19208
Working on page 19209
Working on page 19210
Working on page 19211
Working on page 19212
Working on page 19213
Working on page 19214
Working on page 19215
Working on page 19216
Working on page 19217


## 2. Search Engine

### Pre processing steps

The steps that follow involves the merging of all the tsv, resulting in a dataframe. We then process this dataframe by working on its description (synopsis) field. We do tokenization, removing of stopwords & punctuation, and stemming. The resulting dataframe is saved in the csv format and in binary format for its use later.

In [5]:
def sort_files(t):

    """This method sorts all the tsv files in the following fashion
    anime_0.tsv, anime_1.tsv, anime_2.tsv, anime_3.tsv, ....."""

    return [a(x) for x in re.split(r'(\d+)', t)]

def a(t):
    return int(t) if t.isdigit() else t

In [6]:
def merge_tsvs(path, column_names):
    
    """Here we merge the tsv files into a single dataframe."""

    list_of_files = sorted(os.listdir(path), key=sort_files)
    df = pd.read_csv(path+list_of_files[0],
                     names=column_names,
                     sep="\t", engine='c')
    
    for f in list_of_files[1:]:
        df_ = pd.read_csv(path+f,
                          names=column_names,
                          sep="\t", engine='c')
        df = pd.concat([df, df_], ignore_index=True)
        
    return df

In [196]:
path = "./anime_tsvs/"
columns = ["animeTitle", "animeType", "animeNumEpisode", "releaseDate", "endDate", "animeNumMembers",
            "animeScore", "animeUsers", "animeRank", "animePopularity", "animeDescription", "animeRelated",
            "animeCharacters", "animeVoices", "animeStaff"]

if "df.csv" not in os.listdir(): # then create and pre-process dataset
    df = merge_tsvs(path, columns)
    df = df.drop([0], axis=0)
    df = df.reset_index(drop=True)
    df["animeNumMembers"].fillna(0)
    df["animePopularity"].fillna(0)
    df["animeNumMembers"] = df["animeNumMembers"].astype(int)
    df["animePopularity"] = df["animePopularity"].astype(int)

    df.to_csv("./df.csv")

else:
    df = pd.read_csv("df.csv")

In [7]:
def text_process(text_, type_stemmer="porter"): # we use porter stemmer by default

    """Here we process the synopsis as mentioned above. We return a list containing words which are
    stemmed, tokenized, removed fom punctuation and stopwords."""

    stopwords_english = stopwords.words("english")

    if type_stemmer == "porter":
        stemmer = nltk.stem.PorterStemmer()
    elif type_stemmer == "lancaster":
        stemmer = nltk.stem.LancasterStemmer()
        
    try:
        text_tokenized = nltk.word_tokenize(text_) # tokenization
        stemmed = [stemmer.stem(word) for word in text_tokenized if ((word.lower() not in stopwords_english) and (word not in string.punctuation))] # stemming
    except TypeError as e:
        print(text_)
        raise e
        
    return stemmed

In [199]:
# Load or create (if not already) the dataframe with an additional column of preprocessed description

if "tokenized_df.p" not in os.listdir():
    df_tokenized = df.assign(description_tokenized=df["animeDescription"].fillna('').apply(lambda m: text_process(m)))
    with open("tokenized_df.p", "wb") as f:
        pickle.dump(df_tokenized, f)
else:
    with open("tokenized_df.p", "rb") as f:
        df_tokenized = pickle.load(f)

## 2.1

### 2.1.1

In [8]:
def get_vocabulary(synopsis, vocabulary_file = "vocabulary.pkl"):
    
    """Here we generate a vocab of all words from the description. We tag each word with an integer term_id
    and then save it in a binary file."""

    vocab = set()

    for desc in synopsis:
        vocab = vocab.union(set(desc))

    vocab_dict = dict(zip(sorted(vocab), range(len(vocab))))
    with open(vocabulary_file, "wb") as f:
        pickle.dump(vocab_dict, f)
        
    return vocab_dict

In [9]:
def inverted_idx(synopsis, vocab, inverted_idx_file):
    
    """Here we create a dictionary (inverted index) in which against each term id we have a list of documents no.
    which contain that specific word."""

    inverted_idx = dict()
    for term, term_id in vocab.items():
        inverted_idx[term_id] = set() # create and initialize the dictionary with a set against each key to avoid duplicates

    descriptions = zip(synopsis, range(len(synopsis)))   # tokenized description against doc no. 
    for desc, doc_n in descriptions:
        checked_words = []
        for word in desc:
            if word not in checked_words: # check if we have already worked on this word
                checked_words.append(word)
                term_id = vocab[word]
                inverted_idx[term_id] = inverted_idx[term_id].union(set([doc_n]))

    for term_id, docs_set in inverted_idx.items():
        inverted_idx[term_id] = sorted(list(inverted_idx[term_id]))

    # create and save the inv_idx in a binary file
    with open(inverted_idx_file, "wb") as f:
        pickle.dump(inverted_idx, f)

    return inverted_idx

In [10]:
def get_synopsis(synopsis_file = "tokenized_df.p"):

    """Here we load the descriptions."""

    print('Loading synopsis... ', end ='')
    with open(synopsis_file, 'rb') as f:
        df = pickle.load(f)

    synopsis = list(df['description_tokenized'])
    print('\nSuccessfully loaded.\n')
    return synopsis

In [11]:
def get_vocab(synopsis, vocabulary_file = "vocabulary.pkl"):
    
    """Load vocabulary (in case it's present) otherwise create it."""

    print('Loading vocabulary... ', end ='')
    if vocabulary_file not in os.listdir():
        vocab = get_vocabulary(synopsis, vocabulary_file)
    else:
        with open(vocabulary_file, "rb") as f:
            vocab = pickle.load(f)
    print('\nSuccessfully loaded.\n')
    
    return vocab

In [12]:
def get_inverted_idx(synopsis, vocab, inverted_idx_file = "inverted_index.pkl"):
    
    """Load inverted index (in case it's present) otherwise create it."""

    print('Loading inverted index... ', end ='')
    if inverted_idx_file not in os.listdir():
        inverted_idx = inverted_idx(synopsis, vocab, inverted_idx_file)
    else:
        with open(inverted_idx_file, "rb") as f:
            inverted_idx = pickle.load(f)
    print('\nSuccessfully loaded.\n')
    
    return inverted_idx

In [13]:
vocabulary_file = "vocabulary.pkl"
synopsis_file = "tokenized_df.p"
inverted_idx_file = "inverted_index.pkl"

# Load synopsis, vocabulary, and inverted index
synopsis = get_synopsis(synopsis_file)
vocab = get_vocab(synopsis, vocabulary_file)
inverted_idx = get_inverted_idx(synopsis, vocab, inverted_idx_file)

Loading synopsis... 
Successfully loaded.

Loading vocabulary... 
Successfully loaded.

Loading inverted index... 
Successfully loaded.



In [14]:
def search_engine(vocab, inverted_idx, urls):
    
    """Search engine receives an input query and gives back the result of all anime documents that contain
    every word of the query inputted."""

    query = input('Please enter your query...\nquery: ') # Input query here

    q = query.lower()
    query = text_process(q) # pre-processing step

    # if first word not in our vocab, then no need to search for later words (since it's an AND query)
    if query[0] in vocab:
        term_id_1 = vocab[query[0]]
        docs_set = set(inverted_idx[term_id_1])

        for word in query[1:]:
            if word in vocab:
                term_id = vocab[word]
                docs = inverted_idx[term_id]

                # Intersection is necassary to ensure all words of the query are in the synopsis
                docs_set = docs_set.intersection(set(docs))

                # In case no intersection found
                if len(docs_set) == 0:
                    print("No result found.")
                    return

            else:
                print("No result found.")
                return

        df = pd.read_csv("./df.csv") # df containing the processed snypsis
        
        res = df.iloc[sorted(list(docs_set))][["animeTitle", "animeDescription"]]
        
        for i in sorted(list(docs_set)):
            res['URL'] = urls[i]

        return res

    else:
        print('No result found.')
        return


In [46]:
search_engine(vocab, inverted_idx, urls)

Please enter your query...
query: saiyan race


Unnamed: 0,animeTitle,animeDescription,URL
367,Dragon Ball Z,Five years after winning the World Martial Art...,https://myanimelist.net/anime/986/Dragon_Ball_...
402,Dragon Ball Super: Broly,"Forty-one years ago on Planet Vegeta, home of ...",https://myanimelist.net/anime/986/Dragon_Ball_...
1470,Dragon Ball Z Special 1: Tatta Hitori no Saish...,"Bardock, Son Goku's father, is a low-ranking S...",https://myanimelist.net/anime/986/Dragon_Ball_...


## 2.2

### 2.2.1

In [15]:
def find_tfidf(word, desc, synopsis, idf=None):
    
    """Here we calculate tfidf score corresponding the inputted word."""

    counter = 0
    if idf == None: # calculate idf if not provided
        for desc in synopsis:
            if word in desc:
                counter += 1
                
        idf = np.log(len(synopsis)/counter)
        
    tfidf = desc.count(word)/len(desc) * idf
    
    return idf, tfidf

In [16]:
def inverted_idx_2(synopsis, vocab, inverted_idx_tfidf_file="inverted_index_2.p", idfs_file="idfs.p"):
    
    """Here we generate a dictionary for our inverted index """
    
    second_inverted_idx = dict()
    for term_id in vocab.values():
        second_inverted_idx[term_id] = list()

    calculated_idfs = {}
    
    descriptions = zip(synopsis, range(len(synopsis)))
    for desc, doc_n in descriptions:
        checked_words = []
        for word in desc:
            # avoid redundancy of checking already checked words
            if word not in checked_words:
                checked_words.append(word)
                term_id = vocab[word]
                
                if word not in calculated_idfs.keys():
                    idf, tfidf = find_tfidf(word, desc, synopsis) # calculate idf and tfidf for this new word
                    calculated_idfs[word] = idf
                    
                else:
                    _, tfidf = find_tfidf(word, desc, synopsis, idf)

                second_inverted_idx[term_id].append([doc_n, tfidf]) # append document id and corresponding tfidf score

    for term_id, lists in second_inverted_idx.items():
        second_inverted_idx[term_id] = sorted(second_inverted_idx[term_id], key=lambda m: m[1]) # sort by tfidf score

    with open(inverted_idx_tfidf_file, "wb") as f:
        pickle.dump(second_inverted_idx, f)

    with open(idfs_file, "wb") as f:
        pickle.dump(calculated_idfs, f)

    return second_inverted_idx, calculated_idfs


In [17]:
def get_inverted_idx_tfidf(synopsis, vocab, inverted_idx_tfidf_file, idfs_file):

    """Load inverted index with tfidfs (in case it's present) otherwise create it."""

    print('Loading inverted index tfidf... \n', end ='')
    if (idfs_file not in os.listdir()) or (inverted_idx_tfidf_file not in os.listdir()):
        inv_idx_2, idfs = inverted_idx_2(synopsis, vocab, inverted_idx_tfidf_file, idfs_file)
        
    else:
        with open(inverted_idx_tfidf_file, "rb") as f:
            inv_idx_2 = pickle.load(f)
            
        with open(idfs_file, "rb") as f:
            idfs = pickle.load(f)
    print('Successfully loaded.')
    return inv_idx_2, idfs

In [18]:
inverted_idx_tfidf_file = "inverted_index_2.p"
idfs_file = "idfs.p"

inv_idx_2, idfs = get_inverted_idx_tfidf(synopsis, vocab, inverted_idx_tfidf_file, idfs_file)

Loading inverted index tfidf... 
Successfully loaded.


In [29]:
def find_cos_similarity(vector_1, vector_2):

    """Computes cosine similarity between two vectors"""
    
    return (np.dot(vector_1, vector_2))/(np.linalg.norm(vector_1) * np.linalg.norm(vector_2))

In [20]:
def find_top_k_docs(query, synopsis, vocab, inv_idx_2, idfs, urls, k=10):

    """Here we create max-heap of the documents containing words of the input query,
    we then arrange them wrt cosine similarity of these documents with the query and
    return top k documents only."""

    df = pd.read_csv("./df.csv")

    query = text_process(query.lower()) # query pre-processing

    res_dict = {} # result dictionary

    for word in query:
        if word in vocab.keys():
            term_id = vocab[word]
            for list_ in inv_idx_2[term_id]:
                if list_[0] not in res_dict.keys():
                    res_dict[list_[0]] = []
                res_dict[list_[0]].append(list_[1])
#         else:
#             print("No result found.")

    vector_query = [(query.count(q)/len(query)) * idfs[q] for q in query if q in idfs.keys()]
    
    dists = []
    
    for key in res_dict.keys():
        vec = res_dict[key]
        if len(vec) == len(vector_query):
            dists.append((-find_cos_similarity(vector_query, vec), key))

    heapq.heapify(dists) # using heap data structure
    dists_len = len(dists)
    res = []
    for i in range(min(k, dists_len)):
        e = heapq.heappop(dists)
        res.append([e[1], -e[0]])

    indices = [i[0] for i in res]
    dists = [i[1] for i in res]

    df_1 = df.iloc[indices][["animeTitle", "animeDescription"]]
    
    df_res = df_1.assign(URL=[urls[i] for i in indices],
                                 Similarity=dists)
    return df_res

In [34]:
query = "first anime"
output = find_top_k_docs(query, synopsis, vocab, inv_idx_2, idfs, urls)
output

Unnamed: 0,animeTitle,animeDescription,URL,Similarity
3430,Oshiete! Galko-chan,"At first glance, Galko, Otako, and Ojou are th...",https://myanimelist.net/anime/32013/Oshiete_Ga...,0.999756
5559,Astarotte no Omocha! EX,The OVA Astaroette no Omocha is a three part s...,https://myanimelist.net/anime/10582/Astarotte_...,0.999431
16381,Ishiyama-dera Digital Engi Emaki,The Ishiyama-dera temple in Outsu announced th...,https://myanimelist.net/anime/33982/Ishiyama-d...,0.99924
4844,Panda Kopanda,Panda Kopanda (Panda! Go Panda!) is a 30 minut...,https://myanimelist.net/anime/2611/Panda_Kopanda,0.999154
1119,Mousou Dairinin,The infamous Shounen Bat (Lil' Slugger) is ter...,https://myanimelist.net/anime/323/Mousou_Dairinin,0.998555
9509,Saa Ikou! Tamagotchi,Let's Go! Tamagotchi is an anime series focusi...,https://myanimelist.net/anime/6798/Saa_Ikou_Ta...,0.998269
10786,Touyama Sakura Uchuu Chou: Yatsu no Na wa Gold,This science-fiction anime was inspired somewh...,https://myanimelist.net/anime/12399/Touyama_Sa...,0.996559
1907,Sakurako-san no Ashimoto ni wa Shitai ga Umatt...,When Shoutarou Tatewaki first meets Sakurako K...,https://myanimelist.net/anime/30187/Sakurako-s...,0.995053
9358,Metropolis (2009),Mirai Mizue's first time experimenting with ge...,https://myanimelist.net/anime/29765/Metropolis...,0.9944
325,One Piece: Episode of Merry - Mou Hitori no Na...,The story arcs aboard the Straw Hat Crew's fir...,https://myanimelist.net/anime/19123/One_Piece_...,0.992909


In [35]:
query = "famous story"
output = find_top_k_docs(query, synopsis, vocab, inv_idx_2, idfs, urls)
output

Unnamed: 0,animeTitle,animeDescription,URL,Similarity
3913,Manga Sekai Mukashibanashi,Each episode of this series tells the story of...,https://myanimelist.net/anime/6262/Manga_Sekai...,0.99896
8277,Kuusou no Sora Tobu Kikaitachi,Animated 2002 short film produced by Studio Gh...,https://myanimelist.net/anime/19401/Kuusou_no_...,0.971399
2276,Glass no Kamen,"Kitajima Maya, a 13-year old girl with a talen...",https://myanimelist.net/anime/506/Glass_no_Kamen,0.969861
5665,Chou Hatsumei Boy Kanipan,Cho Hatsumei Boy Kanipan is a continued story ...,https://myanimelist.net/anime/3690/Chou_Hatsum...,0.962115
9253,Marginal Prince: Gekkeiju no Ouji-tachi,"""Marginal Prince"" based off of the hit love si...",https://myanimelist.net/anime/1912/Marginal_Pr...,0.962115
14808,Abe George Kattobi Seishun Ki: Shibuya Honky Tonk,This story is based on one of the famous Japan...,https://myanimelist.net/anime/17501/Abe_George...,0.962115
16923,Kono Shihai kara no Sotsugyou: Ozaki Yutaka,A compilation OVA of 5 stories portraying the ...,https://myanimelist.net/anime/12981/Kono_Shiha...,0.962115
3411,Madou King Granzort,"In the future, the moon is a habitable place w...",https://myanimelist.net/anime/2818/Madou_King_...,0.962115
7689,Jin Sheng Yuan,A famous music video tells a sentimental story...,https://myanimelist.net/anime/10132/Jin_Sheng_...,0.962115
8237,Sekai Meisaku Douwa,Another of Toei's World Famous Fairy Tale seri...,https://myanimelist.net/anime/7398/Sekai_Meisa...,0.962115


## 5. Algorithmic question

Steps to follow:
1. Given an input list of appointments, find all combinations of the possible solutions
2. Check each combination if it is valid or not (no consecutive appointments)
3. For all valid combinations, find their durations
4. Find the combination with the maximum duration
5. Return list of the last step and its duration

Input: appointments_list of length n and distinct values\

routine max_len_appointments(appointments_list):\
    validCombinations = [all combinations in which ther are no consecutive appointments]\
    appointmentDurations = [durations of every instance of validCombinations]\
    maxDuration = max(appointmentDurations)\
    maxLenappointments = [instances of appointmentDurations where duration is maxDuration]
    
    return maxLenappointments, maxDuration
        