In [21]:
# import xml.dom.minidom as minidom
import os
import pandas as pd
from natsort import os_sorted
from markdownify import markdownify as md
import xml.etree.ElementTree as ET
import copy

import warnings
warnings.filterwarnings('ignore')

In [24]:

def get_coverPage_data(akn, coverPage):

    coverPage_data = dict()
    coverPage_data['publisher'] = coverPage.find(".//"+akn+"docAuthority[@refersTo='#publisher']").text
    coverPage_data['issuingBody'] = coverPage.find(".//"+akn+"docAuthority[@refersTo='#issuingBody']").text
    coverPage_data['symbol'] = coverPage.find(".//"+akn+"docNumber[@refersTo='#symbol']").text
    coverPage_data['docStatus'] = coverPage.find(".//"+akn+"docStatus").text
    coverPage_data['date'] = coverPage.find(".//"+akn+"docDate").attrib['date']
    coverPage_data['session'] = coverPage.find(".//"+akn+"session").attrib['value']
    coverPage_data['agendaItem'] = coverPage.find(".//"+akn+"inline[@name='agenda item']").text
    agendaItemDesc =  coverPage.find(".//"+akn+"inline[@name='agenda item name']")
    if(isinstance(agendaItemDesc, ET.Element)):
        agendaItemDesc = agendaItemDesc.text
    else: 
        agendaItemDesc = ''
    coverPage_data['agendaItemDesc'] =  agendaItemDesc   
    coverPage_data['docTitle'] = coverPage.find(".//"+akn+"docTitle").text

    return coverPage_data

In [88]:
def get_resolution_data(path,file, keywords):
    
    

    tree = ET.parse(path + file)
    root = tree.getroot()

    akn = "{http://docs.oasis-open.org/legaldocml/ns/akn/3.0}"
    coverPage = tree.find(".//"+akn+"coverPage")
    preamble =  tree.find(".//"+akn+"preamble")
    mainBody = tree.find(".//"+akn+"mainBody")
    

    resolution_data = []

    #----------------------
    formula = preamble.find(".//"+akn+"formula")
    if(isinstance(formula, ET.Element)):
        formula = formula.find(".//"+akn+"span").text
    else:
        formula = ''
    #----------------------

    containers = preamble.findall(".//"+akn+"container")

    for c in containers:
        preamble_text = c.find(".//"+akn+"p")

        if preamble_text:

            d = get_coverPage_data(akn, coverPage)
            d['forumula'] = formula
            d['eId'] = c.attrib['eId']
            d['p_num'] = ''
            d['text'] = md(ET.tostring(preamble_text, encoding='utf8', method='xml').decode("utf8")).replace("xml version='1.0' encoding='utf8'?","").strip()
            resolution_data.append(d)


    paragraphs = mainBody.findall(".//"+akn+"paragraph")

    for p in paragraphs:
        p_num = p.find(".//"+akn+"num")
        if(isinstance(p_num, ET.Element)):
            p_num = p_num.text
        else: 
            p_num = ''

        p_heading = p.findall(".//"+akn+"heading")
        if p_heading:
            for ph in p_heading:

                d = get_coverPage_data(akn, coverPage)
                d['forumula'] = formula
                d['eId'] = p.attrib['eId']
                d['p_num'] = p_num
                d['text'] = md(ET.tostring(ph, encoding='utf8', method='xml').decode("utf8")).replace("xml version='1.0' encoding='utf8'?","").strip()
                resolution_data.append(d)

        p_content = p.findall(".//"+akn+"content")
        if p_content:
            for pc in p_content:

                d = get_coverPage_data(akn, coverPage)
                d['forumula'] = formula
                d['eId'] = p.attrib['eId']
                d['p_num'] = p_num
                d['text'] = md(ET.tostring(pc, encoding='utf8', method='xml').decode("utf8")).replace("xml version='1.0' encoding='utf8'?","").strip()
                resolution_data.append(d)
                
    x = pd.DataFrame(resolution_data)
    x['select'] = False
    
    for index, row in x.iterrows():
        for k in keywords:
            if k in row['text']:
                row['select'] = True
        
    return x


In [89]:
def get_ga_data(path, keywords):
    data = []
    ga_files = os_sorted(os.listdir(path))
    for idx, file in enumerate(ga_files):
        data.append(get_resolution_data(path,file, keywords))
    return pd.concat(data)


In [90]:
GA_76_path = "../UNxml/GAresolutions-main/76session/English/"
GA_77_path = "../UNxml/GAresolutions-main/77session/English/"
ECOSOC_2021_path = "../UNxml/ECOSOCresolutions-main/2021/English/"
ECOSOC_2021_path = "../UNxml/ECOSOCresolutions-main/2022/English/"


In [91]:
ga_76_data = get_ga_data(GA_76_path,['statistic', 'data'])
# ga_77_data = get_ga_data(GA_77_path)
# ecosoc_2021_data = get_ga_data(ECOSOC_2021_path)
# ecosoc_2022_data = get_ga_data(ECOSOC_2022_path)

In [93]:
ga_76_data.to_excel("../test.xlsx") 

ModuleNotFoundError: No module named 'openpyxl'

Unnamed: 0,publisher,issuingBody,symbol,docStatus,date,session,agendaItem,agendaItemDesc,docTitle,forumula,eId,p_num,text
11,United Nations,General Assembly,A/RES/76/16,Distr.: General,2021-12-08,76,Agenda item 10,,Resolution adopted by the General Assembly on ...,The General Assembly,container_13,,Recalling also the Doha Declaration on Integra...
151,United Nations,General Assembly,A/RES/76/71,Distr.: General,2021-12-17,76,Agenda item 78 (b),Oceans and the law of the sea: sustainable fis...,Resolution adopted by the General Assembly on ...,The General Assembly,para_1288,69.0,Also reaffirms its request that the Food and A...
8,United Nations,General Assembly,A/RES/76/85,Distr.: General,2021-12-15,76,Agenda item 59,Information from Non-Self-Governing Territorie...,Resolution adopted by the General Assembly on ...,The General Assembly,para_11,3.0,Also requests the administering Powers concern...
16,United Nations,General Assembly,A/RES/76/135,Distr.: General,2022-01-05,76,Agenda item 28 (b),"Social development: social development, includ...",Resolution adopted by the General Assembly on ...,The General Assembly,para_46,9.0,Also encourages Governments to intensify and e...
60,United Nations,General Assembly,A/RES/76/136,Distr.: General,2022-01-06,76,Agenda item 28 (b),"Social development: social development, includ...",Resolution adopted by the General Assembly on ...,The General Assembly,para_293,23.0,Encourages Member States to improve the collec...
43,United Nations,General Assembly,A/RES/76/137,Distr.: General,2022-01-06,76,Agenda item 28 (b),"Social development: social development, includ...",Resolution adopted by the General Assembly on ...,The General Assembly,para_73,9.0,Stresses the need to strengthen and support th...
48,United Nations,General Assembly,A/RES/76/138,Distr.: General,2022-01-06,76,Agenda item 28 (b),"Social development: social development, includ...",Resolution adopted by the General Assembly on ...,The General Assembly,para_326,25.0,Recommends that Member States enhance their ca...
52,United Nations,General Assembly,A/RES/76/140,Distr.: General,2022-01-06,76,Agenda item 29 (a),Advancement of women: advancement of women,Resolution adopted by the General Assembly on ...,The General Assembly,para_20,2.0,"Addressing the lack of quality, accessible, ti..."
53,United Nations,General Assembly,A/RES/76/140,Distr.: General,2022-01-06,76,Agenda item 29 (a),Advancement of women: advancement of women,Resolution adopted by the General Assembly on ...,The General Assembly,para_20,2.0,Strengthening the capacity of national statist...
37,United Nations,General Assembly,A/RES/76/141,Distr.: General,2022-01-06,76,Agenda item 29 (a),Advancement of women: advancement of women,Resolution adopted by the General Assembly on ...,"The General Assembly,",container_39,,"Emphasizing the need for objective, comprehens..."


Unnamed: 0,publisher,issuingBody,symbol,docStatus,date,session,agendaItem,agendaItemDesc,docTitle,forumula,eId,p_num,text
0,United Nations,Economic and Social Council,E/RES/2022/1,Distr.: General,2021-08-02,2022,Agenda item 2,Adoption of the agenda and other organizationa...,Resolution adopted by the Economic and Social ...,The Economic and Social Council,container_2,,Recalling General Assembly resolutions 61/16 o...
1,United Nations,Economic and Social Council,E/RES/2022/1,Distr.: General,2021-08-02,2022,Agenda item 2,Adoption of the agenda and other organizationa...,Resolution adopted by the Economic and Social ...,The Economic and Social Council,container_3,,"Reiterating that, as a principal organ of the ..."
2,United Nations,Economic and Social Council,E/RES/2022/1,Distr.: General,2021-08-02,2022,Agenda item 2,Adoption of the agenda and other organizationa...,Resolution adopted by the Economic and Social ...,The Economic and Social Council,container_4,,"Acknowledging that, in scheduling its sessions..."
3,United Nations,Economic and Social Council,E/RES/2022/1,Distr.: General,2021-08-02,2022,Agenda item 2,Adoption of the agenda and other organizationa...,Resolution adopted by the Economic and Social ...,The Economic and Social Council,container_5,,Recalling the adoption of the Addis Ababa Acti...
4,United Nations,Economic and Social Council,E/RES/2022/1,Distr.: General,2021-08-02,2022,Agenda item 2,Adoption of the agenda and other organizationa...,Resolution adopted by the Economic and Social ...,The Economic and Social Council,container_6,,Recalling also that it was decided in the inte...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5,United Nations,Economic and Social Council,E/RES/2022/25,Distr.: General,2022-07-29,2022,Agenda item 7,Operational activities of the United Nations f...,Resolution adopted by the Economic and Social ...,The Economic and Social Council,para_10,4.,"Re-emphasizes that adequate, predictable and s..."
6,United Nations,Economic and Social Council,E/RES/2022/25,Distr.: General,2022-07-29,2022,Agenda item 7,Operational activities of the United Nations f...,Resolution adopted by the Economic and Social ...,The Economic and Social Council,para_14,5.,Takes note of the report of the Chair of the U...
7,United Nations,Economic and Social Council,E/RES/2022/25,Distr.: General,2022-07-29,2022,Agenda item 7,Operational activities of the United Nations f...,Resolution adopted by the Economic and Social ...,The Economic and Social Council,para_19,6.,Welcomes the consultative process launched by ...
8,United Nations,Economic and Social Council,E/RES/2022/25,Distr.: General,2022-07-29,2022,Agenda item 7,Operational activities of the United Nations f...,Resolution adopted by the Economic and Social ...,The Economic and Social Council,para_25,7.,Stresses the need to continue improving the tr...
