# Clean AFT Dataset
**Author:** Jim Maddock

**Last Updated:** 5-3-20

**Description:** Import the initial English AFT dataset (downloaded from https://old.datahub.io/dataset/wikipedia-article-feedback-corpus) and create features for exploratory analysis and classifier.  The processed dataset should include the following **feature fields**:
* unique ID
* original ID (these are non-unique but could be useful for debugging)
* article and revision ID
* comment (might have to remove stop tokens)
* article topic from https://github.com/wikimedia/drafttopic
* article features - TBD
* vandalism score from https://github.com/wikimedia/draftquality

The dataset should also include the following **class fields**:
* helpful/un-helpful
* non-actionable

In [47]:
import csv
import pandas as pd
import numpy as np
import uuid
import random
import requests
import time
from oresapi import Session
import concurrent.futures
import logging
import sys

import matplotlib.pyplot as plt
import matplotlib

matplotlib.style.use('ggplot')

In [48]:
LOGFILE = './logs/clean_dataset_5-7-20.log'

logger = logging.getLogger(__name__)
handler = logging.FileHandler(LOGFILE)

formatter = logging.Formatter(fmt='[%(levelname)s %(asctime)s] %(message)s',
                              datefmt='%m/%d/%Y %I:%M:%S %p')
handler.setFormatter(formatter)
logger.addHandler(handler)

In [49]:
# import dataset from csv
FILEPATH = '/srv/aft/raw/dump_03-24-20.csv'

dtypes = {
    'aft_id':object
}

df = pd.read_csv(FILEPATH,escapechar='\\', encoding='latin-1', dtype=dtypes)

In [50]:
# remove all comments that do not have a helpful or unhelpful label
df = df.loc[(df['aft_helpful'] > 0) | (df['aft_unhelpful'] > 0)]

In [51]:
# generate a unique ID
# make sure to use a random seed so this is reproducable
rd = random.Random()
rd.seed(0)
df['UUID'] = df.apply(lambda x: uuid.UUID(int=rd.getrandbits(128)), axis=1)
df = df.reset_index()

In [52]:
# get vandalism score for each comment using draft quality
# PRE: comment text, not including stop words
# POST: return the positive and negative scores for the comment

# Ignore and log errors

BASE_URL = "https://ores.wikimedia.org/v3/scores/enwiki/57185234/draftquality"

def threadedVandalismScoreHandler(df, max_workers=4, handle_errors=False):
    
    to_process = df[['aft_comment','UUID']].iterrows()
    score_list = []
    processed = 0
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:

        future_to_url = {executor.submit(getVandalismScore, row[1]): row for row in to_process}
        for future in concurrent.futures.as_completed(future_to_url):
            try:
                score_dict = future.result()
                score_list.append(score_dict)
            except:
                if handle_errors:
                    pass
                else:
                    raise
            processed += 1
            
            #if processed % 100 == 0:
            #    print('scored {0} documents'.format(processed))
    
    score_df = pd.DataFrame(score_list)
    return score_df

# handle and log all errors
def getVandalismScore(row):
    try:
        score_json = vandalismScoreRequest(row['aft_comment'])
        score_dict = vandalismJsonToDict(score_json)
        score_dict['UUID'] = row['UUID']
        return score_dict
    except:
        logger.warning(row['UUID'])
        raise

def formatCommentText(comment_text):
    comment_text = str(comment_text)
    #comment_text = bytes(comment_text, 'latin-1').decode('utf-8')
    comment_text = comment_text.replace('\n','')
    comment_text = comment_text.replace('\t','')
    comment_text = comment_text.replace('"','')
    #comment_text = comment_text.replace("\'","")
    #comment_text = comment_text.replace('"','')
    #comment_text = comment_text.replace('”','')
    comment_text = '"{0}"'.format(comment_text)
    return comment_text
    

def vandalismScoreRequest(comment_text):
    comment_text = formatCommentText(comment_text)
    params = {
        'datasource.revision.text':comment_text
    }
    r = requests.get(BASE_URL,params=params)
    if not r:
        raise requests.RequestException()
    json_response = r.json()
    return json_response

def vandalismJsonToDict(json):
    base = json['enwiki']['scores']['57185234']['draftquality']['score']
    score_dict = {
        'dq_pred':base['prediction'],
        'dq_prob_OK':base['probability']['OK'],
        'dq_prob_attack':base['probability']['attack'],
        'dq_prob_spam':base['probability']['spam'],
        'dq_prob_vandalism':base['probability']['vandalism']
    }
    return score_dict

#threadedVandalismScoreHandler(test)

In [53]:
def getTopics(df):
    rev_id_list = df['aft_page_revision'].unique().tolist()
    
    session = Session("https://ores.wikimedia.org",user_agent="")
    results = session.score("enwiki", ["drafttopic"], rev_id_list)

    topic_list = []
    for rev_id, result in zip(rev_id_list, results):
        score_dict = {
            'aft_page_revision':rev_id
        }
        if 'error' in result['drafttopic']:
            score_dict['dt_pred'] = None
        elif len(result['drafttopic']['score']['prediction']) < 1:
            score_dict['dt_pred'] = None
        else:
            try:
                score_dict['dt_pred'] = result['drafttopic']['score']['prediction'][0],
            except:
                print(result)
                raise
        topic_list.append(score_dict)
    
    topic_df = pd.DataFrame(topic_list)
    return topic_df

#getTopics(test)

In [54]:
def addPredColumns(df):
    vandalism_pred_df = threadedVandalismScoreHandler(df, max_workers=8, handle_errors=True)
    topic_pred_df = getTopics(df)

    df = df.merge(vandalism_pred_df, on='UUID', how='outer')
    df = df.merge(topic_pred_df, on='aft_page_revision', how='outer')
    return df

In [55]:
FILE_OUTPUT_PATH = '/srv/aft/processed/chunks/chunk_{0}_5-7-20.csv'
CHUNK_SIZE = 100

def chunkAndProcess(df, fileout = None, chunk_size = 1000):
    df_list = [df[i:i+chunk_size] for i in range(0,df.shape[0],chunk_size)]
    for i, df_chunk in enumerate(df_list):
        try:
            result_chunk = addPredColumns(df_chunk)
            if fileout:
                result_chunk.to_csv(fileout.format(i), index = False)
            print('processed chunk {0}'.format(i))
        except:
            logger.warning('error in chunk {0}'.format(i))
            logger.warning(sys.exc_info())
            pass
    
#test = df[1000:2000]
#chunkAndProcess(test, fileout = FILE_OUTPUT_PATH, chunk_size=CHUNK_SIZE)

In [56]:
chunkAndProcess(df, fileout = FILE_OUTPUT_PATH, chunk_size=CHUNK_SIZE)

processed chunk 0
processed chunk 1
processed chunk 2
processed chunk 3
processed chunk 4
processed chunk 5
processed chunk 6
processed chunk 7
processed chunk 8
processed chunk 9
processed chunk 10




processed chunk 11




processed chunk 12
processed chunk 13
processed chunk 14
processed chunk 15
processed chunk 16
processed chunk 17




processed chunk 18
processed chunk 19




processed chunk 20




processed chunk 21




processed chunk 22
processed chunk 23




processed chunk 24
processed chunk 25




processed chunk 26




processed chunk 27




processed chunk 28




processed chunk 29




processed chunk 30




processed chunk 31




processed chunk 32




processed chunk 33




processed chunk 34




processed chunk 35




processed chunk 36




processed chunk 37




processed chunk 38




processed chunk 39




processed chunk 40
processed chunk 41




processed chunk 42




processed chunk 43
processed chunk 44
processed chunk 45




processed chunk 46




processed chunk 47




processed chunk 48




processed chunk 49
processed chunk 50




processed chunk 51
processed chunk 52




processed chunk 53




processed chunk 54
processed chunk 55
processed chunk 56
processed chunk 57




processed chunk 58
processed chunk 59
processed chunk 60




processed chunk 61
processed chunk 62
processed chunk 63




processed chunk 64
processed chunk 65
processed chunk 66
processed chunk 67




processed chunk 68
processed chunk 69




processed chunk 70
processed chunk 71
processed chunk 72
processed chunk 73




processed chunk 74
processed chunk 75




processed chunk 76
processed chunk 77




processed chunk 78




processed chunk 79




processed chunk 80




processed chunk 81




processed chunk 82
processed chunk 83
processed chunk 84




processed chunk 85
processed chunk 86




processed chunk 87
processed chunk 88
processed chunk 89




processed chunk 90
processed chunk 91




processed chunk 92




processed chunk 93




processed chunk 94




processed chunk 95
processed chunk 96
processed chunk 97
processed chunk 98
processed chunk 99




processed chunk 100
processed chunk 101
processed chunk 102




processed chunk 103
processed chunk 104




processed chunk 105




processed chunk 106




processed chunk 107
processed chunk 108




processed chunk 109
processed chunk 110




processed chunk 111




processed chunk 112




processed chunk 113
processed chunk 114
processed chunk 115




processed chunk 116




processed chunk 117




processed chunk 118




processed chunk 119




processed chunk 120




processed chunk 121
processed chunk 122




processed chunk 123
processed chunk 124
processed chunk 125




processed chunk 126
processed chunk 127




processed chunk 128
processed chunk 129
processed chunk 130
processed chunk 131
processed chunk 132
processed chunk 133
processed chunk 134




processed chunk 135
processed chunk 136
processed chunk 137
processed chunk 138
processed chunk 139
processed chunk 140
processed chunk 141
processed chunk 142




processed chunk 143
processed chunk 144




processed chunk 145




processed chunk 146




processed chunk 147
processed chunk 148




processed chunk 149
processed chunk 150
processed chunk 151




processed chunk 152




processed chunk 153
processed chunk 154
processed chunk 155
processed chunk 156




processed chunk 157
processed chunk 158




processed chunk 159




processed chunk 160
processed chunk 161




processed chunk 162
processed chunk 163
processed chunk 164
processed chunk 165




processed chunk 166
processed chunk 167




processed chunk 168




processed chunk 169




processed chunk 170
processed chunk 171




processed chunk 172
processed chunk 173
processed chunk 174




processed chunk 175
processed chunk 176
processed chunk 177




processed chunk 178




processed chunk 179




processed chunk 180




processed chunk 181




processed chunk 182




processed chunk 183
processed chunk 184
processed chunk 185




processed chunk 186
processed chunk 187




processed chunk 188




processed chunk 189
processed chunk 190




processed chunk 191




processed chunk 192




processed chunk 193




processed chunk 194
processed chunk 195
processed chunk 196




processed chunk 197
processed chunk 198




processed chunk 199
processed chunk 200




processed chunk 201




processed chunk 202
processed chunk 203
processed chunk 204




processed chunk 205
processed chunk 206




processed chunk 207




processed chunk 208




processed chunk 209
processed chunk 210




processed chunk 211
processed chunk 212




processed chunk 213
processed chunk 214




processed chunk 215
processed chunk 216
processed chunk 217
processed chunk 218




processed chunk 219
processed chunk 220




processed chunk 221




processed chunk 222
processed chunk 223




processed chunk 224
processed chunk 225
processed chunk 226




processed chunk 227




processed chunk 228




processed chunk 229
processed chunk 230




processed chunk 231




processed chunk 232
processed chunk 233




processed chunk 234




processed chunk 235
processed chunk 236
processed chunk 237




processed chunk 238




processed chunk 239




processed chunk 240




processed chunk 241




processed chunk 242
processed chunk 243




processed chunk 244




processed chunk 245




processed chunk 246




processed chunk 247




processed chunk 248




processed chunk 249




processed chunk 250




processed chunk 251
processed chunk 252
processed chunk 253




processed chunk 254
processed chunk 255




processed chunk 256




processed chunk 257




processed chunk 258




processed chunk 259




processed chunk 260




processed chunk 261




processed chunk 262




processed chunk 263




processed chunk 264




processed chunk 265




processed chunk 266




processed chunk 267




processed chunk 268




processed chunk 269
processed chunk 270




processed chunk 271




processed chunk 272
processed chunk 273
processed chunk 274




processed chunk 275
processed chunk 276




processed chunk 277




processed chunk 278
processed chunk 279




processed chunk 280




processed chunk 281




processed chunk 282




processed chunk 283
processed chunk 284
processed chunk 285
processed chunk 286




processed chunk 287




processed chunk 288




processed chunk 289




processed chunk 290
processed chunk 291




processed chunk 292




processed chunk 293




processed chunk 294




processed chunk 295




processed chunk 296




processed chunk 297




processed chunk 298




processed chunk 299
processed chunk 300




processed chunk 301




processed chunk 302




processed chunk 303




processed chunk 304




processed chunk 305




processed chunk 306




processed chunk 307




processed chunk 308




processed chunk 309




processed chunk 310




processed chunk 311




processed chunk 312




processed chunk 313




processed chunk 314




processed chunk 315




processed chunk 316
processed chunk 317




processed chunk 318
processed chunk 319




processed chunk 320




processed chunk 321




processed chunk 322




processed chunk 323




processed chunk 324




processed chunk 325
processed chunk 326




processed chunk 327




processed chunk 328




processed chunk 329




processed chunk 330




processed chunk 331




processed chunk 332




processed chunk 333
processed chunk 334
processed chunk 335




processed chunk 336




processed chunk 337




processed chunk 338
processed chunk 339




processed chunk 340




processed chunk 341




processed chunk 342
processed chunk 343
processed chunk 344




processed chunk 345




processed chunk 346
processed chunk 347




processed chunk 348
processed chunk 349




processed chunk 350




processed chunk 351




processed chunk 352
processed chunk 353




processed chunk 354




processed chunk 355




processed chunk 356




processed chunk 357
processed chunk 358




processed chunk 359




processed chunk 360




processed chunk 361




processed chunk 362
processed chunk 363
processed chunk 364




processed chunk 365




processed chunk 366
processed chunk 367




processed chunk 368




processed chunk 369
processed chunk 370




processed chunk 371




processed chunk 372
processed chunk 373
processed chunk 374




processed chunk 375




processed chunk 376
processed chunk 377




processed chunk 378
processed chunk 379
processed chunk 380
processed chunk 381
processed chunk 382
processed chunk 383
processed chunk 384




processed chunk 385




processed chunk 386
processed chunk 387




processed chunk 388




processed chunk 389
processed chunk 390




processed chunk 391




processed chunk 392
processed chunk 393




processed chunk 394
processed chunk 395
processed chunk 396




processed chunk 397
processed chunk 398




processed chunk 399




processed chunk 400




processed chunk 401
processed chunk 402
processed chunk 403
processed chunk 404




processed chunk 405




processed chunk 406




processed chunk 407
processed chunk 408
processed chunk 409




processed chunk 410
processed chunk 411




processed chunk 412




processed chunk 413




processed chunk 414
processed chunk 415
processed chunk 416
processed chunk 417
processed chunk 418
processed chunk 419




processed chunk 420




processed chunk 421
processed chunk 422




processed chunk 423




processed chunk 424




processed chunk 425




processed chunk 426




processed chunk 427




processed chunk 428




processed chunk 429




processed chunk 430
processed chunk 431
processed chunk 432
processed chunk 433




processed chunk 434




processed chunk 435
processed chunk 436
processed chunk 437
processed chunk 438




processed chunk 439
processed chunk 440




processed chunk 441
processed chunk 442
processed chunk 443




processed chunk 444




processed chunk 445
processed chunk 446




processed chunk 447




processed chunk 448




processed chunk 449




processed chunk 450
processed chunk 451




processed chunk 452




processed chunk 453




processed chunk 454




processed chunk 455




processed chunk 456
processed chunk 457




processed chunk 458
processed chunk 459
processed chunk 460




processed chunk 461
processed chunk 462




processed chunk 463
processed chunk 464




processed chunk 465




processed chunk 466
processed chunk 467




processed chunk 468




processed chunk 469
processed chunk 470




processed chunk 471
processed chunk 472




processed chunk 473
processed chunk 474
processed chunk 475
processed chunk 476
processed chunk 477
processed chunk 478
processed chunk 479
processed chunk 480
processed chunk 481
processed chunk 482
processed chunk 483




processed chunk 484
processed chunk 485
processed chunk 486
processed chunk 487




processed chunk 488
processed chunk 489
processed chunk 490
processed chunk 491




processed chunk 492




processed chunk 493




processed chunk 494




processed chunk 495




processed chunk 496




processed chunk 497
processed chunk 498




processed chunk 499
processed chunk 500
processed chunk 501




processed chunk 502
processed chunk 503
processed chunk 504
processed chunk 505
processed chunk 506




processed chunk 507
processed chunk 508




processed chunk 509




processed chunk 510




processed chunk 511
processed chunk 512
processed chunk 513




processed chunk 514
processed chunk 515




processed chunk 516




processed chunk 517




processed chunk 518




processed chunk 519




processed chunk 520




processed chunk 521




processed chunk 522




processed chunk 523
processed chunk 524
processed chunk 525




processed chunk 526




processed chunk 527




processed chunk 528




processed chunk 529
processed chunk 530




processed chunk 531




processed chunk 532




processed chunk 533
processed chunk 534
processed chunk 535




processed chunk 536




processed chunk 537
processed chunk 538




processed chunk 539
processed chunk 540




processed chunk 541




processed chunk 542
processed chunk 543




processed chunk 544
processed chunk 545




processed chunk 546
processed chunk 547
processed chunk 548
processed chunk 549




processed chunk 550
processed chunk 551




processed chunk 552
processed chunk 553
processed chunk 554
processed chunk 555
processed chunk 556
processed chunk 557
processed chunk 558




processed chunk 559
processed chunk 560




processed chunk 561




processed chunk 562




processed chunk 563




processed chunk 564




processed chunk 565




processed chunk 566
processed chunk 567




processed chunk 568




processed chunk 569




processed chunk 570




processed chunk 571




processed chunk 572




processed chunk 573




processed chunk 574




processed chunk 575




processed chunk 576




processed chunk 577




processed chunk 578




processed chunk 579




processed chunk 580




processed chunk 581
processed chunk 582




processed chunk 583




processed chunk 584




processed chunk 585
processed chunk 586
processed chunk 587




processed chunk 588




processed chunk 589




processed chunk 590




processed chunk 591
processed chunk 592




processed chunk 593




processed chunk 594
processed chunk 595




processed chunk 596




processed chunk 597




processed chunk 598




processed chunk 599
processed chunk 600




processed chunk 601




processed chunk 602
processed chunk 603
processed chunk 604
processed chunk 605
processed chunk 606




processed chunk 607




processed chunk 608




processed chunk 609
processed chunk 610
processed chunk 611




processed chunk 612




processed chunk 613
processed chunk 614
processed chunk 615
processed chunk 616




processed chunk 617
processed chunk 618
processed chunk 619




processed chunk 620




processed chunk 621




processed chunk 622
processed chunk 623




processed chunk 624




processed chunk 625
processed chunk 626
processed chunk 627




processed chunk 628




processed chunk 629




processed chunk 630




processed chunk 631




processed chunk 632




processed chunk 633




processed chunk 634




processed chunk 635




processed chunk 636
processed chunk 637
processed chunk 638




processed chunk 639




processed chunk 640
processed chunk 641




processed chunk 642




processed chunk 643




processed chunk 644




processed chunk 645




processed chunk 646




processed chunk 647
processed chunk 648
processed chunk 649




processed chunk 650




processed chunk 651




processed chunk 652




processed chunk 653




processed chunk 654




processed chunk 655




processed chunk 656




processed chunk 657




processed chunk 658




processed chunk 659




processed chunk 660




processed chunk 661




processed chunk 662




processed chunk 663




processed chunk 664




processed chunk 665




processed chunk 666
processed chunk 667




processed chunk 668




processed chunk 669




processed chunk 670




processed chunk 671




processed chunk 672




processed chunk 673
processed chunk 674




processed chunk 675
processed chunk 676




processed chunk 677




processed chunk 678




processed chunk 679




processed chunk 680
processed chunk 681




processed chunk 682




processed chunk 683




processed chunk 684




processed chunk 685
processed chunk 686




processed chunk 687




processed chunk 688




processed chunk 689




processed chunk 690




processed chunk 691




processed chunk 692




processed chunk 693




processed chunk 694




processed chunk 695
processed chunk 696




processed chunk 697




processed chunk 698
processed chunk 699




processed chunk 700




processed chunk 701
processed chunk 702
processed chunk 703




processed chunk 704
processed chunk 705
processed chunk 706




processed chunk 707
processed chunk 708




processed chunk 709
processed chunk 710
processed chunk 711




processed chunk 712




processed chunk 713




processed chunk 714
processed chunk 715




processed chunk 716
processed chunk 717




processed chunk 718
processed chunk 719
processed chunk 720




processed chunk 721




processed chunk 722




processed chunk 723




processed chunk 724
processed chunk 725
processed chunk 726




processed chunk 727
processed chunk 728




processed chunk 729
processed chunk 730




processed chunk 731




processed chunk 732
processed chunk 733
processed chunk 734




processed chunk 735
processed chunk 736
processed chunk 737




processed chunk 738
processed chunk 739
processed chunk 740
processed chunk 741




processed chunk 742
processed chunk 743
processed chunk 744




processed chunk 745
processed chunk 746




processed chunk 747




processed chunk 748
processed chunk 749




processed chunk 750




processed chunk 751




processed chunk 752
processed chunk 753




processed chunk 754




processed chunk 755




processed chunk 756




processed chunk 757




processed chunk 758
processed chunk 759




processed chunk 760




processed chunk 761




processed chunk 762
processed chunk 763




processed chunk 764
processed chunk 765




processed chunk 766




processed chunk 767
processed chunk 768
processed chunk 769
processed chunk 770




processed chunk 771




processed chunk 772




processed chunk 773




processed chunk 774




processed chunk 775




processed chunk 776




processed chunk 777




processed chunk 778




processed chunk 779




processed chunk 780




processed chunk 781
processed chunk 782




processed chunk 783




processed chunk 784




processed chunk 785




processed chunk 786




processed chunk 787




processed chunk 788
processed chunk 789




processed chunk 790




processed chunk 791




processed chunk 792




processed chunk 793




processed chunk 794




processed chunk 795
processed chunk 796
processed chunk 797




processed chunk 798




processed chunk 799




processed chunk 800




processed chunk 801




processed chunk 802




processed chunk 803




processed chunk 804




processed chunk 805




processed chunk 806




processed chunk 807




processed chunk 808
processed chunk 809




processed chunk 810
processed chunk 811




processed chunk 812




processed chunk 813
processed chunk 814




processed chunk 815




processed chunk 816
processed chunk 817
processed chunk 818




processed chunk 819




processed chunk 820




processed chunk 821
processed chunk 822




processed chunk 823




processed chunk 824
processed chunk 825
processed chunk 826




processed chunk 827
processed chunk 828
processed chunk 829




processed chunk 830




processed chunk 831




processed chunk 832
processed chunk 833




processed chunk 834




processed chunk 835




processed chunk 836




processed chunk 837




processed chunk 838
processed chunk 839




processed chunk 840




processed chunk 841




processed chunk 842




processed chunk 843




processed chunk 844
processed chunk 845




processed chunk 846




processed chunk 847
processed chunk 848




processed chunk 849
processed chunk 850




processed chunk 851




processed chunk 852
processed chunk 853
processed chunk 854
processed chunk 855




processed chunk 856




processed chunk 857




processed chunk 858




processed chunk 859




processed chunk 860




processed chunk 861




processed chunk 862




processed chunk 863




processed chunk 864
processed chunk 865
processed chunk 866
processed chunk 867




processed chunk 868




processed chunk 869




processed chunk 870
processed chunk 871




processed chunk 872




processed chunk 873
processed chunk 874




processed chunk 875
processed chunk 876
processed chunk 877




processed chunk 878




processed chunk 879
processed chunk 880




processed chunk 881




processed chunk 882




processed chunk 883
processed chunk 884
processed chunk 885




processed chunk 886
processed chunk 887




processed chunk 888




processed chunk 889




processed chunk 890




processed chunk 891
processed chunk 892




processed chunk 893




processed chunk 894




processed chunk 895
processed chunk 896




processed chunk 897




processed chunk 898
processed chunk 899




processed chunk 900
processed chunk 901
processed chunk 902




processed chunk 903
processed chunk 904




processed chunk 905




processed chunk 906




processed chunk 907
processed chunk 908
processed chunk 909




processed chunk 910




processed chunk 911




processed chunk 912
processed chunk 913




processed chunk 914
processed chunk 915




processed chunk 916
processed chunk 917




processed chunk 918




processed chunk 919




processed chunk 920




processed chunk 921




processed chunk 922




processed chunk 923




processed chunk 924




processed chunk 925




processed chunk 926




processed chunk 927




processed chunk 928




processed chunk 929




processed chunk 930




processed chunk 931




processed chunk 932




processed chunk 933
processed chunk 934




processed chunk 935




processed chunk 936
processed chunk 937




processed chunk 938




processed chunk 939




processed chunk 940




processed chunk 941




processed chunk 942




processed chunk 943




processed chunk 944




processed chunk 945
processed chunk 946




processed chunk 947




processed chunk 948




processed chunk 949




processed chunk 950
processed chunk 951




processed chunk 952
processed chunk 953




processed chunk 954




processed chunk 955




processed chunk 956




processed chunk 957
processed chunk 958




processed chunk 959




processed chunk 960




processed chunk 961
processed chunk 962




processed chunk 963




processed chunk 964




processed chunk 965




processed chunk 966




processed chunk 967




processed chunk 968




processed chunk 969




processed chunk 970
processed chunk 971




processed chunk 972




processed chunk 973




processed chunk 974
processed chunk 975




processed chunk 976




processed chunk 977




processed chunk 978
processed chunk 979




processed chunk 980




processed chunk 981




processed chunk 982




processed chunk 983




processed chunk 984




processed chunk 985




processed chunk 986




processed chunk 987




processed chunk 988




processed chunk 989
processed chunk 990
processed chunk 991
processed chunk 992




processed chunk 993




processed chunk 994




processed chunk 995




processed chunk 996




processed chunk 997




processed chunk 998




processed chunk 999




processed chunk 1000
processed chunk 1001
processed chunk 1002




processed chunk 1003




processed chunk 1004




processed chunk 1005




processed chunk 1006




processed chunk 1007
processed chunk 1008
processed chunk 1009




processed chunk 1010
processed chunk 1011




processed chunk 1012




processed chunk 1013




processed chunk 1014




processed chunk 1015




processed chunk 1016
processed chunk 1017




processed chunk 1018




processed chunk 1019




processed chunk 1020
processed chunk 1021




processed chunk 1022




processed chunk 1023




processed chunk 1024




processed chunk 1025




processed chunk 1026




processed chunk 1027




processed chunk 1028




processed chunk 1029
processed chunk 1030




processed chunk 1031




processed chunk 1032
processed chunk 1033




processed chunk 1034




processed chunk 1035




processed chunk 1036




processed chunk 1037




processed chunk 1038




processed chunk 1039




processed chunk 1040




processed chunk 1041
processed chunk 1042




processed chunk 1043




processed chunk 1044
processed chunk 1045




processed chunk 1046
processed chunk 1047
processed chunk 1048




processed chunk 1049




processed chunk 1050




processed chunk 1051




processed chunk 1052




processed chunk 1053
processed chunk 1054




processed chunk 1055




processed chunk 1056




processed chunk 1057




processed chunk 1058




processed chunk 1059




processed chunk 1060




processed chunk 1061




processed chunk 1062




processed chunk 1063




processed chunk 1064




processed chunk 1065




processed chunk 1066




processed chunk 1067




processed chunk 1068




processed chunk 1069




processed chunk 1070




processed chunk 1071




processed chunk 1072




processed chunk 1073




processed chunk 1074




processed chunk 1075




processed chunk 1076
processed chunk 1077




processed chunk 1078




processed chunk 1079




processed chunk 1080




processed chunk 1081
processed chunk 1082
processed chunk 1083




processed chunk 1084




processed chunk 1085




processed chunk 1086




processed chunk 1087




processed chunk 1088




processed chunk 1089
processed chunk 1090
processed chunk 1091




processed chunk 1092




processed chunk 1093




processed chunk 1094




processed chunk 1095




processed chunk 1096




processed chunk 1097
processed chunk 1098




processed chunk 1099




processed chunk 1100




processed chunk 1101
processed chunk 1102




processed chunk 1103




processed chunk 1104
processed chunk 1105
processed chunk 1106
processed chunk 1107




processed chunk 1108




processed chunk 1109




processed chunk 1110




processed chunk 1111




processed chunk 1112




processed chunk 1113




processed chunk 1114




processed chunk 1115




processed chunk 1116




processed chunk 1117




processed chunk 1118




processed chunk 1119




processed chunk 1120
processed chunk 1121




processed chunk 1122




processed chunk 1123




processed chunk 1124




processed chunk 1125




processed chunk 1126




processed chunk 1127




processed chunk 1128
processed chunk 1129




processed chunk 1130




processed chunk 1131




processed chunk 1132




processed chunk 1133




processed chunk 1134




processed chunk 1135
processed chunk 1136




processed chunk 1137




processed chunk 1138




processed chunk 1139
processed chunk 1140
processed chunk 1141




processed chunk 1142




processed chunk 1143




processed chunk 1144




processed chunk 1145




processed chunk 1146




processed chunk 1147
processed chunk 1148
processed chunk 1149


In [57]:
len(df)

114984