In [18]:
#general
from pprint import pprint
from itertools import repeat
import time
import re

#natural language
from readability_score.calculators.fleschkincaid import *
from readability_score.calculators.flesch import *
from readability_score.calculators.dalechall import *
from readability_score.calculators.colemanliau import *
from readability_score.calculators.linsearwrite import *
from readability_score.calculators.smog import *
from readability_score.calculators.ari import *
import nltk
import string


#reading/writing files
import os
import xml.etree.ElementTree as ET
import csv

#aws
import boto3
import json
import decimal

#multiprocessing
import multiprocessing as mp

In [21]:
def get_text_from_file(path):
    rtn = open(path).read()
    return rtn

def get_xml_tree(path):
    rtn = ET.ElementTree()
    try:
        rtn = ET.parse(path)
    except:
        print ("error parsing - %s" % (path) )

    return rtn

def parse_xml(path):
    print ("parsing - %s" % (path) )
    rtn = dict()

    tree = get_xml_tree(path)
    root = tree.getroot()
    
    rtn['path'] = path
    body = ''

    #find symbol and jobno
    if root is not None:
        for idno in root.findall(".//idno"):
            if idno.text is not None:
                rtn[idno.get('type')] = idno.text

        rtn['root'] = ''
        if rtn['symbol'] is not None:
            rtn['root'] = rtn['symbol'].split("/")[0]
            rtn['organ'] = get_organ(ORGAN_SYMBOL, rtn['symbol'], 2)

        rtn['keywords'] = list()
        for keyword in root.findall(".//keywords"):
            if keyword.text is not None:
                rtn['keywords'].append(keyword.text)
        
        for date in tree.findall(".//publicationStmt/date"):
            if date.text is not None:
                rtn['date'] = date.text
        
        for paragraph in tree.findall(".//body/p"):
            for sentence in paragraph.findall("s"):
                if sentence.text is not None:
                    body += sentence.text
                    if sentence.text[-1:] not in string.punctuation:
                        body += '. '
                    elif sentence == paragraph.findall("s")[-1]:
                        #if it's the last sentence in a paragraph
                        if sentence.text[-1:] in string.punctuation:
                            #replace last element for a period
                            body = body[:-1]
                        body += '. '
                    else:
                        body += ' '
                        
    rtn['body'] = body
        
        
    return rtn

def get_file_list(root_dir, extension, batch_size):
    rtn = []
    i=0
    #rtn = [os.path.join(root, filename) for root, dirs, files in os.walk(root_dir, topdown=False) for filename in files ]
    for root, dirs, files in os.walk(root_dir,topdown=False):
        for filename in files:
            if not filename.startswith(".") and filename.endswith(extension): 
                path = os.path.join(root,filename)
                rtn.append(path)
                i+=1
                if i % batch_size == 0:
                    print ("%s - %s" % (i, path) )
    return rtn

def get_grades(text, locale):
    rtn=dict()
    results = FleschKincaid(text, locale=locale)
    rtn['FleschKincaid'] = {"score": results.us_grade, "min_age": results.min_age}

    results = Flesch(text, locale=locale)
    rtn['FleschReading'] = {"score": results.reading_ease, "min_age": 0}
    
    #TODO: check usage of simplewordlist
    results = DaleChall(text, simplewordlist=SIMPLE_WORDS_LIST, locale=locale)
    rtn['DaleChall'] = {"score": results.us_grade, "min_age": results.min_age}
    
    results = ColemanLiau(text, locale=locale)
    rtn['ColemanLiau'] = {"score": results.us_grade, "min_age": results.min_age}
    
    results = LinsearWrite(text, locale=locale)
    rtn['LinsearWrite'] = {"score": results.us_grade, "min_age": results.min_age}
    
    results = SMOG(text, locale=locale)
    rtn['SMOG'] = {"score": results.us_grade, "min_age": results.min_age}
    
    results = ARI(text, locale=locale)
    rtn['ARI'] = {"score": results.us_grade, "min_age": results.min_age}

    #convert to decimal
    for key, scores in rtn.items():
        for score, value in scores.items():
            rtn[key][score] = decimal.Decimal(str(value))
    
    return rtn
    
def process_file(file_path, locale):
    rtn = parse_xml(file_path)
    rtn["grades"] = get_grades(rtn["body"], locale)
    return rtn

def get_meta_of_file_index(file_list, locale, index):
    file_path = file_list[index]
    rtn = process_file(file_path, locale)
    return rtn

def get_meta_of_file(file_path, locale):
    rtn = process_file(file_path, locale)
    return rtn

def process_files(file_list, locale, batch_size):
    rtn = []
    i=0
    for file_path in file_list:
        doc_meta = process_file(file_path, locale)
        rtn.append(doc_meta)
        i+=1
        if i % batch_size == 0:
            print("adding item %s - %s" % (i, item['symbol']) )
            
    return rtn

def process_files_mp(file_list, locale, processes, batch_size):
    rtn = []
    pool = mp.Pool(processes=processes)
    #rtn = pool.starmap(process_file, file_list) #previous map without locale
    rtn = pool.starmap(process_file, zip(file_list, repeat(locale)), chunksize=100) #map with more than one parameter

    return rtn
        
def process_files_write_to_db(file_list, locale, table):
    for file_path in file_list:
        doc_meta = process_file(file_path, locale)
        write_to_db(table, doc_meta)

    return True

def load_organ_symbol_file(path):
    rtn = {}
    with open(path, 'r') as csvfile:
        for row in csv.DictReader(csvfile, delimiter=';'):
            rtn[row['symbol_series']] = row['body_organ']
    return rtn
    
def get_organ(organ_symbol, symbol, start_by=2):
    #attempt to find first by the start_by slash, moving down to first slash
    #return other if not found
    rtn = None
    for i in range(start_by, 0, -1):
        parts = symbol.split('/')
        if len(parts) >= start_by:
            lookup_by = '/'.join(parts[:i])            
            rtn = organ_symbol.get(lookup_by, None)
            if rtn is not None:
                break
    if rtn is None:
        rtn = 'Other'
    return rtn
        
def load_simple_words_file(path):
    rtn = []
    with open(path, 'r') as txtfile:
        for line in txtfile: 
            line = line.strip()
            rtn.append(line)     
    return rtn

def write_to_db(table, item):
    print("adding item:", item['symbol'])
    table.put_item(Item = item)

def write_to_csv(docs_meta, path):
    with open(path, 'w', newline='') as csvfile:
        csvwriter = csv.writer(csvfile, delimiter=';')
        
        #header row
        row = ["path","symbol","root","organ","date"]
        for key, scores in docs_meta[1]['grades'].items():
            for score, value in scores.items():
                row.append("%s_%s" % (key, score))
        csvwriter.writerow(row)
        
        #rest of the rows
        for doc_meta in docs_meta:
            if 'path' in doc_meta and 'symbol' in doc_meta:
                row = [doc_meta['path'], doc_meta['symbol'], doc_meta['root'], doc_meta['organ'], doc_meta['date'] ]
                for key, scores in doc_meta['grades'].items():
                    for score, value in scores.items():
                        row.append(value)
                csvwriter.writerow(row)

def main():
    #file_path = './test2.txt'
    #text = get_text_from_file(file_path)
    #print(text)

    start_time = time.time()

    root_dir = './files/UNv1.0-TEI/en'
    batch_size = 5000
    locale = 'en_US'
    
    global ORGAN_SYMBOL
    ORGAN_SYMBOL = load_organ_symbol_file('./organ_symbol.csv')
    
    global SIMPLE_WORDS_LIST
    SIMPLE_WORDS_LIST = load_simple_words_file("./DaleChallEasyWordList.txt")
    #file_list = get_file_list(root_dir, '.xml', batch_size)

    ##process with dynamodb
    #dynamodb = boto3.resource('dynamodb', region_name='us-east-1')
    #table = dynamodb.Table('readabilityScores')
    #write_to_db(table, doc_meta)
    #process_files_write_to_db(file_list, locale, table)
    
    ##score one file    
    #doc_meta = get_meta_of_file_index(file_list, locale, 5)
    doc_meta = get_meta_of_file('./files/UNv1.0-TEI/en/2012/gc_14/sr_6.xml', locale)
    print(doc_meta)
    readability = DaleChall(doc_meta['body'], simplewordlist=SIMPLE_WORDS_LIST, locale=locale)
    #readability = FleschKincaid(doc_meta['body'], locale=locale)
    #pprint (vars(readability) )
    print (readability.min_age, readability.readingindex, readability.scores, readability.us_grade )
    words=[]
    print ("=========SENTENCES=========")
    for sentence in readability.sentences:
        print("sentence: "+sentence)
        words.append(readability.re_words.findall(sentence))

    print ("======WORDS========")
    print(len(words))
    pprint (words)
    
    ##process batch of files
    #cores = mp.cpu_count()
    #docs_meta = process_files_mp(file_list, locale, (cores-2), 100)
    #write_to_csv(docs_meta, './results.csv')
    #print(docs_meta[1000])

    #print("--- %s seconds ---" % (time.time() - start_time))
    
    
    #organ_symbol = load_organ_symbol_file('./organ_symbol.csv')
    #print(lookup_organ(organ_symbol,'ECE/ITC/Ass.dasasdas'),2)
    
    
#on first run we have to download the nltk dictionary
#nltk.download('punkt')
main()


parsing - ./files/UNv1.0-TEI/en/2012/gc_14/sr_6.xml
{'path': './files/UNv1.0-TEI/en/2012/gc_14/sr_6.xml', 'symbol': 'GC.14/SR.6', 'jobno': 'V1187674', 'root': 'GC.14', 'organ': 'Other', 'keywords': [], 'date': '20120106', 'body': 'General Conference. Fourteenth session. Summary record of the 6th meeting. Held at the Vienna International Centre, Vienna, on Wednesday, 30 November 2011, at 3.30 p.m. President: Ms. Sión (Ecuador. Contents. Agenda item. Paragraphs. Annual reports of the Director-General on the activities of the Organization for. 2009 and 2010 (continued. Reports of the Industrial Development Board on the work of its thirty-seventh. thirty-eighth and thirty-ninth regular sessions (continued. Forum on industrial development issues (continued. Financial matters (continued. (a) Scale of assessments of Member States (continued. (b) Financial situation of UNIDO (continued. (c) Working Capital Fund (continued. (d) Financial regulations (continued. (e) Appointment of an External Au

23 12.153755202115528 {'sent_count': 321, 'word_count': 6947, 'letter_count': 38378, 'syll_count': 12470, 'polysyllword_count': 1668, 'simpleword_count': 3672, 'sentlen_average': 21.641744548286603, 'wordlen_average': 1.7950194328487117, 'wordletter_average': 5.524399021160213, 'wordsent_average': 0.0462069958255362} 18.153755202115526
321
[['General', 'Conference'],
 ['Fourteenth', 'session'],
 ['Summary', 'record', 'of', 'the', '6th', 'meeting'],
 ['Held',
  'at',
  'the',
  'Vienna',
  'International',
  'Centre',
  'Vienna',
  'on',
  'Wednesday',
  '30',
  'November',
  '2011',
  'at',
  '3',
  '30',
  'p',
  'm',
  'President',
  'Ms',
  'Sión',
  'Ecuador'],
 ['Contents'],
 ['Agenda', 'item'],
 ['Paragraphs'],
 ['Annual',
  'reports',
  'of',
  'the',
  'Director',
  'General',
  'on',
  'the',
  'activities',
  'of',
  'the',
  'Organization',
  'for'],
 ['2009', 'and', '2010', 'continued'],
 ['Reports',
  'of',
  'the',
  'Industrial',
  'Development',
  'Board',
  'on',
  'th

  'the',
  'high',
  'level',
  'meetings',
  'held',
  'in',
  'Vienna',
  'in',
  '2010',
  'on',
  'energy',
  'and',
  'green',
  'industry',
  'and',
  'the',
  'Sustainable',
  'Energy',
  'for',
  'All',
  'initiative',
  'could',
  'be',
  'seen',
  'as',
  'building',
  'blocks',
  'for',
  'the',
  'success',
  'of',
  'the',
  'United',
  'Nations',
  'Conference',
  'on',
  'Sustainable',
  'Development',
  'Rio',
  '20'],
 ['Sustainable',
  'development',
  'and',
  'greener',
  'economies',
  'could',
  'be',
  'achieved',
  'only',
  'by',
  'restructuring',
  'the',
  'twentieth',
  'century',
  'economic',
  'model',
  'based',
  'as',
  'it',
  'was',
  'on',
  'the',
  'extensive',
  'use',
  'of',
  'oil',
  'and',
  'other',
  'fossil',
  'fuels'],
 ['Such', 'restructuring', 'would', 'however', 'be', 'painful'],
 ['His',
  'country',
  's',
  'economy',
  'was',
  'based',
  'on',
  'agriculture',
  'and',
  'mining',
  'the',
  'latter',
  'accounting',
  'for',
 

  '2010'],
 ['Some',
  'progress',
  'had',
  'been',
  'made',
  'but',
  'much',
  'remained',
  'to',
  'be',
  'done'],
 ['Countries',
  'like',
  'Mozambique',
  'had',
  'failed',
  'to',
  'reap',
  'the',
  'full',
  'benefits',
  'of',
  'globalization',
  'since',
  'to',
  'reduce',
  'poverty',
  'countries',
  'had',
  'to',
  'grow',
  'both',
  'rapidly',
  'and',
  'sustainably'],
 ['Economic',
  'growth',
  'depended',
  'on',
  'improved',
  'supply',
  'side',
  'capacity',
  'competitiveness',
  'effective',
  'trade',
  'and',
  'investment',
  'policies',
  'and',
  'a',
  'business',
  'environment',
  'that',
  'could',
  'attract',
  'domestic',
  'and',
  'foreign',
  'investment'],
 ['UNIDO',
  'could',
  'assist',
  'her',
  'country',
  'as',
  'it',
  'assisted',
  'many',
  'developing',
  'countries',
  'in',
  'providing',
  'the',
  'appropriate',
  'skills',
  'technology',
  'transfer',
  'and',
  'institutional',
  'capacity',
  'building'],
 ['Moza

 ['Mr',
  'Annan',
  'Ghana',
  'said',
  'that',
  'the',
  'Director',
  'General',
  's',
  'report',
  'demonstrated',
  'the',
  'commitment',
  'of',
  'UNIDO',
  'to',
  'eradicating',
  'poverty',
  'through',
  'technical',
  'cooperation',
  'and',
  'other',
  'forms',
  'of',
  'support',
  'for',
  'industrial',
  'activities',
  'in',
  'Member',
  'States',
  'including',
  'Ghana'],
 ['In',
  'collaboration',
  'with',
  'UNIDO',
  'the',
  'Ministry',
  'of',
  'Trade',
  'and',
  'Industry',
  'of',
  'Ghana',
  'had',
  'organized',
  'a',
  'two',
  'day',
  'policy',
  'conference',
  'for',
  'ministers',
  'of',
  'industry',
  'energy',
  'and',
  'environment',
  'in',
  'West',
  'African',
  'countries',
  'on',
  'the',
  'theme',
  'Competitiveness',
  'and',
  'diversification',
  'strategy',
  'challenges',
  'in',
  'a',
  'petroleum',
  'rich',
  'economy'],
 ['The',
  'success',
  'of',
  'the',
  'conference',
  'had',
  'been',
  'such',
  'that',
  

  'aimed',
  'at',
  'improving',
  'the',
  'functional',
  'literacy',
  'of',
  'the',
  'young',
  'and',
  'providing',
  'productive',
  'work',
  'for',
  'young',
  'people',
  'changing',
  'them',
  'from',
  'job',
  'seekers',
  'to',
  'job',
  'creators'],
 ['Such',
  'programmes',
  'were',
  'also',
  'useful',
  'in',
  'developing',
  'the',
  'value',
  'chain',
  'and',
  'the',
  'supply',
  'chain',
  'matching',
  'the',
  'supplier',
  'with',
  'the',
  'buyer',
  'accelerating',
  'the',
  'pace',
  'of',
  'exports',
  'through',
  'export',
  'consortia',
  'and',
  'promoting',
  'industrial',
  'diversification'],
 ['Zambia',
  'wished',
  'to',
  'be',
  'part',
  'of',
  'the',
  'third',
  'industrial',
  'development',
  'wave',
  'by',
  'implementing',
  'policy',
  'reforms',
  'as',
  'promoted',
  'by',
  'UNIDO',
  'using',
  'its',
  'own',
  'raw',
  'products',
  'with',
  'the',
  'participation',
  'of',
  'its',
  'own',
  'citizens'],
 ['T

  'attain',
  'the',
  'Millennium',
  'Development',
  'Goals'],
 ['International',
  'organizations',
  'and',
  'institutions',
  'dealing',
  'with',
  'economic',
  'and',
  'social',
  'development',
  'should',
  'therefore',
  'be',
  'reformed',
  'in',
  'order',
  'to',
  'fit',
  'in',
  'with',
  'the',
  'international',
  'reality'],
 ['They',
  'should',
  'reflect',
  'the',
  'interests',
  'of',
  'all',
  'countries',
  'including',
  'developing',
  'and',
  'least',
  'developed',
  'countries'],
 ['El',
  'Salvador',
  'thus',
  'commended',
  'the',
  'new',
  'direction',
  'in',
  'which',
  'the',
  'Director',
  'General',
  'was',
  'taking',
  'UNIDO',
  'with',
  'the',
  'emphasis',
  'on',
  'the',
  'sustainability',
  'of',
  'the',
  'new',
  'industrial',
  'revolution'],
 ['The',
  'allocation',
  'of',
  'programmes',
  'and',
  'technical',
  'cooperation',
  'projects',
  'must',
  'however',
  'be',
  'based',
  'on',
  'a',
  'fair',
  'geogra

  'and',
  'implementing',
  'cooperation',
  'projects'],
 ['The',
  'Director',
  'General',
  's',
  'support',
  'for',
  'Africa',
  'Industrialization',
  'Day',
  'and',
  'the',
  'assistance',
  'and',
  'cooperation',
  'provided',
  'by',
  'UNIDO',
  'to',
  'African',
  'countries',
  'was',
  'most',
  'welcome'],
 ['Ms',
  'Mutandiro',
  'Zimbabwe',
  'said',
  'that',
  'the',
  'growing',
  'demand',
  'for',
  'technical',
  'assistance',
  'from',
  'UNIDO',
  'was',
  'testimony',
  'to',
  'the',
  'Organization',
  's',
  'critical',
  'relevance',
  'to',
  'the',
  'economic',
  'development',
  'efforts',
  'of',
  'its',
  'Member',
  'States'],
 ['Her',
  'Government',
  'appreciated',
  'the',
  'priority',
  'given',
  'by',
  'the',
  'Director',
  'General',
  'to',
  'increasing',
  'technical',
  'cooperation',
  'delivery'],
 ['It',
  'therefore',
  'endorsed',
  'the',
  'implementation',
  'of',
  'the',
  'Programme',
  'for',
  'Change',
  'and',
 

  'framework',
  '2010',
  '2013',
  'continued',
  'IDB',
  '39',
  '8'],
 ['UNIDO',
  'activities',
  'related',
  'to',
  'energy',
  'and',
  'environment',
  'continued',
  'GC',
  '14',
  '8',
  'and',
  'GC',
  '14',
  '9'],
 ['UNIDO',
  'activities',
  'related',
  'to',
  'agribusiness',
  'trade',
  'and',
  'job',
  'creation',
  'continued',
  'GC',
  '14',
  '15'],
 ['Programme',
  'and',
  'budgets',
  '2012',
  '2013',
  'continued',
  'IDB',
  '39',
  '13',
  'Rev',
  '1',
  'IDB',
  '39',
  'Dec',
  '7'],
 ['Review',
  'of',
  'the',
  'progress',
  'of',
  'the',
  'deliberations',
  'of',
  'the',
  'informal',
  'working',
  'group',
  'continued',
  'GC',
  '14',
  '14'],
 ['UNIDO',
  'Staff',
  'Pension',
  'Committee',
  'continued',
  'IDB',
  '39',
  'Dec',
  '8'],
 ['UNIDO',
  'activities',
  'related',
  'to',
  'the',
  '2009',
  'Vienna',
  'Ministerial',
  'Declaration',
  'and',
  'Plan',
  'of',
  'Action',
  'of',
  'the',
  'Least',
  'Developed',
  'C

  'the',
  'entire',
  'livestock',
  'population',
  'in',
  'an',
  'extremely',
  'cold',
  'winter'],
 ['Prolonged',
  'drought',
  'over',
  'the',
  'past',
  'few',
  'years',
  'throughout',
  'the',
  'country',
  'especially',
  'in',
  'the',
  'Gobi',
  'Desert',
  'was',
  'also',
  'causing',
  'water',
  'shortages',
  'in',
  'many',
  'places',
  'forcing',
  'nomads',
  'to',
  'migrate',
  'to',
  'other',
  'areas',
  'where',
  'the',
  'concentration',
  'of',
  'livestock',
  'further',
  'burdened',
  'the',
  'pasture',
  'land',
  'and',
  'prevented',
  'it',
  'from',
  'regenerating',
  'properly'],
 ['Almost',
  '90',
  'per',
  'cent',
  'of',
  'the',
  'territory',
  'had',
  'become',
  'vulnerable',
  'to',
  'desertification',
  'and',
  'land',
  'degradation'],
 ['Aware',
  'of',
  'the',
  'challenges',
  'of',
  'sustainable',
  'development',
  'Mongolia',
  'had',
  'turned',
  'to',
  'UNIDO',
  'for',
  'advice',
  'and',
  'assistance'],
 ['

 ['As',
  'a',
  'country',
  'that',
  'had',
  'greatly',
  'benefited',
  'from',
  'the',
  'support',
  'of',
  'UNIDO',
  'Lesotho',
  'welcomed',
  'the',
  'Director',
  'General',
  's',
  'report',
  'on',
  'the',
  'Organization',
  's',
  'activities'],
 ['Lesotho',
  'supported',
  'the',
  'initiatives',
  'of',
  'UNIDO',
  'to',
  'promote',
  'the',
  'structural',
  'transformation',
  'of',
  'the',
  'least',
  'developed',
  'countries',
  'LDCs',
  'establish',
  'energy',
  'as',
  'a',
  'key',
  'pillar',
  'of',
  'industrial',
  'growth',
  'poverty',
  'alleviation',
  'and',
  'wealth',
  'creation',
  'and',
  'promote',
  'sustainable',
  'development',
  'through',
  'a',
  'green',
  'economy'],
 ['Lesotho',
  'had',
  'set',
  'itself',
  'the',
  'target',
  'of',
  'progressing',
  'from',
  'LDC',
  'status',
  'by',
  '2020'],
 ['It',
  'had',
  'also',
  'set',
  'the',
  'target',
  'of',
  '60',
  'per',
  'cent',
  'rural',
  'electrification'

 ['The',
  'Government',
  'had',
  'also',
  'embarked',
  'on',
  'the',
  'strengthening',
  'of',
  'agribusiness',
  'and',
  'the',
  'agro',
  'processing',
  'industry',
  'with',
  'the',
  'emphasis',
  'on',
  'marine',
  'products',
  'fruit',
  'and',
  'vegetables',
  'oilseed',
  'cotton',
  'and',
  'textiles'],
 ['A',
  'top',
  'priority',
  'for',
  'the',
  'country',
  's',
  'national',
  'youth',
  'development',
  'policy',
  'was',
  'to',
  'develop',
  'the',
  'capacity',
  'of',
  'young',
  'people',
  'for',
  'industrial',
  'activities'],
 ['Ghana',
  'would',
  'welcome',
  'any',
  'assistance',
  'with',
  'the',
  'training',
  'of',
  'young',
  'people',
  'in',
  'entrepreneurship',
  'evolving',
  'trends',
  'in',
  'global',
  'business',
  'and',
  'global',
  'financing'],
 ['Technical',
  'cooperation',
  'between',
  'Ghana',
  'and',
  'UNIDO',
  'under',
  'the',
  'medium',
  'term',
  'programme'],
 ['framework', '2008', '2011', 'had',

  'UNIDO',
  'and',
  'countries',
  'and',
  'organizations',
  'such',
  'as',
  'the',
  'European',
  'Union',
  'that',
  'supported',
  'specific',
  'UNIDO',
  'programmes'],
 ['He', 'called', 'on', 'more', 'developed'],
 ['middle',
  'income',
  'countries',
  'such',
  'as',
  'Brazil',
  'China',
  'India',
  'South',
  'Africa',
  'Turkey',
  'and',
  'the',
  'Arab',
  'countries',
  'to',
  'do',
  'more',
  'to',
  'support',
  'the',
  'South',
  'South',
  'initiatives',
  'of',
  'UNIDO'],
 ['His',
  'country',
  's',
  'industrial',
  'advance',
  'would',
  'be',
  'no',
  'threat',
  'to',
  'developed',
  'countries',
  'and',
  'would',
  'indeed',
  'be',
  'beneficial',
  'to',
  'them'],
 ['Ms',
  'Mapunjo',
  'United',
  'Republic',
  'of',
  'Tanzania',
  'expressed',
  'her',
  'delegation',
  's',
  'gratitude',
  'for',
  'the',
  'leadership',
  'of',
  'the',
  'Director',
  'General',
  'and',
  'the',
  'activities',
  'of',
  'UNIDO',
  'in',
  'helpi

  'Latin',
  'America',
  'and',
  'the',
  'Caribbean',
  'consisted',
  'not',
  'only',
  'of',
  'emerging',
  'countries',
  'but',
  'also',
  'of',
  'many',
  'with',
  'lower',
  'levels',
  'of',
  'development',
  'and',
  'growth',
  'which',
  'should',
  'also',
  'be',
  'provided',
  'with',
  'more',
  'technical',
  'scientific',
  'and',
  'financial',
  'cooperation'],
 ['He',
  'therefore',
  'urged',
  'the',
  'developed',
  'countries',
  'that',
  'contributed',
  'to',
  'the',
  'Industrial',
  'Development',
  'Fund',
  'and',
  'the',
  'trust',
  'funds',
  'to',
  'bear',
  'in',
  'mind',
  'the',
  'needs',
  'of',
  'the',
  'relatively',
  'less',
  'developed',
  'countries',
  'of',
  'Latin',
  'America',
  'and',
  'the',
  'Caribbean',
  'especially',
  'in',
  'Central',
  'America'],
 ['El',
  'Salvador',
  'was',
  'going',
  'through',
  'a',
  'process',
  'of',
  'change',
  'which',
  'included',
  'a',
  'new',
  'vision',
  'of',
  'deve

  's',
  'cooperation',
  'with',
  'FAO',
  'the',
  'International',
  'Fund',
  'for',
  'Agricultural',
  'Development',
  'IFAD',
  'and',
  'other',
  'development',
  'partners',
  'to',
  'promote',
  'value',
  'chain',
  'development',
  'in',
  'Africa',
  'build',
  'competitive',
  'food',
  'supply',
  'systems',
  'and',
  'reduce',
  'reliance',
  'on',
  'food',
  'imports'],
 ['Successful',
  'pilot',
  'projects',
  'had',
  'been',
  'held',
  'to',
  'that',
  'end',
  'in',
  '12',
  'countries',
  'and',
  'her',
  'delegation',
  'urged',
  'UNIDO',
  'to',
  'replicate',
  'them',
  'in',
  'other',
  'parts',
  'of',
  'Africa'],
 ['The',
  'activities',
  'of',
  'UNIDO',
  'in',
  'building',
  'capacity',
  'and',
  'providing',
  'technical',
  'support',
  'in',
  'the',
  'area',
  'of',
  'supply',
  'chain',
  'management',
  'quality',
  'and',
  'standards',
  'would',
  'assist',
  'developing',
  'countries',
  'in',
  'increasing',
  'their',
  's