This notebook is on code snippet how to use the AXA-Parsr docker container and programmatic access to it
[This is work in progress]

## Installations

Guide: https://github.com/axa-group/Parsr/tree/master/demo/parsr-jupyter-demo
After Parsr-client is installed we need default config to work with server API and the wrapper to handle the output.
Check the local folder ./server

In [5]:
from requests import post

# this is the relative filepath of pdf to be send for processing
file_path='../documents/testfile.pdf'
# this is relative filepath to the configfile which is required by server to set the 
# internal module configs, forms part of POST request
config_path='../axaserver/defaultConfig.json'
def send_doc(url,file_path, config_path):
    packet = {
            'file': (file_path,
                     open(file_path, 'rb'),
                     'application/pdf'),
            'config': (config_path,
                      open(config_path, 'rb'),
                      'application/json')
                      }
    r = post(url + '/api/v1/document', files=packet)
    
    return {
                'file': file_path,
                'config': config_path,
                'status_code': r.status_code,
                'server_response': r.text
                }

In [2]:
# THIS IS EXAMPLE CODE FROM AXA REPO FOR RENDERING THE OUTPUT FROM PARSR SERVER


# Copyright 2019 AXA Group Operations S.A.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import uuid
import json
from IPython.display import display, Markdown, HTML
from pygments import highlight
from pygments.lexers import JsonLexer
from pygments.formatters import TerminalFormatter


class RenderJSON(object):
	def __init__(self, data):
		self.render_json(data)
	def render_json(self, data):
		json_object = json.loads(json.dumps(data))
		json_str = json.dumps(json_object, indent=4, sort_keys=True)
		print(highlight(json_str, JsonLexer(), TerminalFormatter()))


class RenderMarkdown(object):
	def __init__(self, markdown_data):
		self.markdown_data = markdown_data
		self.uuid = str(uuid.uuid4())

	def _ipython_display_(self):
		display(Markdown(self.markdown_data))


class RenderHTML(object):
	def __init__(self, html_data=None, html_file=None):
		if not html_data and not html_file:
			print("You need to provide either a filename or raw HTML data for something to be rendered")
		self.html_data = html_data
		self.html_file = html_file
		self.uuid = str(uuid.uuid4())

	def _ipython_display_(self):
		if self.html_data is not None:
			display(HTML(self.html_data))
		if self.html_file is not None:
			HTML(filename=self.filename)

In [6]:
from requests import get
from json import loads
import pandas as pd
from io import StringIO
from ast import literal_eval

# this is taken/adapted from https://github.com/axa-group/Parsr/blob/master/clients/python-client/parsr_client/parsr_client.py

def get_json(request_id: str = "", server: str = ""):
        """Fetch the Parsr's output JSON file (result) given a particular
        request

        - request_id: The ID of the request to be queried with the server
        - server: The server from which the JSON is to be fetched
        """

        if server == "":
            raise Exception('No server address provided')

        if request_id == "":
            raise Exception('No request ID provided')

        r = get('{}/api/v1/json/{}'.format(server,request_id))
        if r.text != "":
            return r.json()
        else:
            return {'request_id': request_id, 'server_response': r.json()}
        
def get_text(request_id: str = "", server: str = ""):
        """Fetch the Parsr's output JSON file (result) given a particular
        request

        - request_id: The ID of the request to be queried with the server
        - server: The server from which the JSON is to be fetched
        """

        if server == "":
            raise Exception('No server address provided')

        if request_id == "":
            raise Exception('No request ID provided')

        r = get('{}/api/v1/text/{}'.format(server,request_id))
        if r.text != "":
            return r.text
        else:
            return {'request_id': request_id, 'server_response': r.text}
        

        
def get_markdown(request_id: str = "", server: str = ""):
        """Fetch the Parsr's output JSON file (result) given a particular
        request

        - request_id: The ID of the request to be queried with the server
        - server: The server from which the JSON is to be fetched
        """

        if server == "":
            raise Exception('No server address provided')

        if request_id == "":
            raise Exception('No request ID provided')

        r = get('{}/api/v1/markdown/{}'.format(server,request_id))
        if r.text != "":
            return r.text
        else:
            return {'request_id': request_id, 'server_response': r.text}        


def get_table(
            request_id: str = "",
            page=None,
            table=None,
            seperator=";",
            server: str = "",
            column_names: list = None):
        """Get a particular table from a processed document.

        - request_id: The request to be queried to get a document.
        - page: The page number on which the queried table exists.
        - table: The table number to be fetched.
        - seperator: The seperator to be used between table cells (default ';')
        - server: The server address which is to be queried.
        - column_names: The headings of the table searched (column titles)
        """
        if server == "":
            raise Exception('No server address provided')
        
        if request_id == "":
            raise Exception('No request ID provided')
        
        if page is None and table is None:
            r = get('{}/api/v1/csv/{}'.format(server,request_id))
        else:
            r = get('{}/api/v1/csv/{}/{}/{}'.format(server,
                                                    request_id,
                                                    page,
                                                    table))

        if r.text != "":
            try:
                df = pd.read_csv(
                    StringIO(
                        r.text),
                    sep=seperator,
                    names=column_names)
                df.loc[:, ~df.columns.str.match('Unnamed')]
                df = df.where((pd.notnull(df)), " ")
                return df
            except Exception:
                return r.text
        else:
            return r.text

## Local-Server

In [8]:
url = "http://localhost:3001" 
r  = send_doc(url=url,file_path=file_path,config_path=config_path)
r

{'file': '../documents/testfile.pdf',
 'config': '../axaserver/defaultConfig.json',
 'status_code': 202,
 'server_response': 'd6690f0638017986fe312742ffe51b'}

In [233]:
# it can take few sec to few min to process the pdf
r_status = get('{}/api/v1/queue/{}'.format(url,r['server_response']))
print(r_status)
loads(r_status.text)

urllib3.connectionpool - DEBUG - Starting new HTTP connection (1): localhost:3001
urllib3.connectionpool - DEBUG - http://localhost:3001 "GET /api/v1/queue/6a47c1790b724991974571e67539ef HTTP/1.1" 201 257


<Response [201]>


{'id': '6a47c1790b724991974571e67539ef',
 'json': '/api/v1/json/6a47c1790b724991974571e67539ef',
 'csv': '/api/v1/csv/6a47c1790b724991974571e67539ef',
 'text': '/api/v1/text/6a47c1790b724991974571e67539ef',
 'markdown': '/api/v1/markdown/6a47c1790b724991974571e67539ef'}

In [242]:
r_json = get_json(server=url, request_id=loads(r_status.text)['id'])
r_text = get_text(server=url, request_id=loads(r_status.text)['id'])
r_markdown = get_markdown(server=url, request_id=loads(r_status.text)['id'])

urllib3.connectionpool - DEBUG - Starting new HTTP connection (1): localhost:3001
urllib3.connectionpool - DEBUG - http://localhost:3001 "GET /api/v1/json/6a47c1790b724991974571e67539ef HTTP/1.1" 200 174688
urllib3.connectionpool - DEBUG - Starting new HTTP connection (1): localhost:3001
urllib3.connectionpool - DEBUG - http://localhost:3001 "GET /api/v1/text/6a47c1790b724991974571e67539ef HTTP/1.1" 200 6241
urllib3.connectionpool - DEBUG - Starting new HTTP connection (1): localhost:3001
urllib3.connectionpool - DEBUG - http://localhost:3001 "GET /api/v1/markdown/6a47c1790b724991974571e67539ef HTTP/1.1" 200 6376


In [245]:
from IPython.display import display, Markdown, HTML
display(Markdown(r_markdown))

In accordance with decision No. 1/CP.21, paragraph 24, adopted at the Conference of the Parties to the United Nations Framework Convention on Climate Change (COP21), the Republic of Azerbaijan provides an updated version of its “Nationally Determined Contributions (NDCs)” document, as well as additional information on contributions to ensure clarity and transparency on the basis of decision No. 4/CMA.1 of the Conference of the Parties (COP24).

---

**The Republic of Azerbaijan** Updated document on Nationally Determined Contributions (NDC)**

**2023**

---

**Table of Content**

**Introduction**

1. **Information on the process of preparation of the document “Nationally Determined Contributions”**
2. **National circumstances**
3. **Information on the implemented Climate Policy**
4. **Mitigation measures of environmental impacts: 4.1 Targets for reducing greenhouse gas emissions 4.2 Sectoral policies**

---

**Introduction**

As agreed at the Conference of the Parties in Paris (COP21), all party states, regardless of their level of development and political objectives, shall mobilise more intensively to take urgent actions for achieving the goals of the Paris Agreement.

According to the 6th Assessment Report of the Intergovernmental Panel on Climate Change (IPCC), global temperatures will continue to rise until at least the middle of this century under all scenarios considered for emissions. Moreover, global warming will exceed 1.5°C or 2°C in the 21st century unless drastic reductions in emissions of carbon dioxide (CO2) and other greenhouse gases (GHGs) are achieved in the coming decades.

The Republic of Azerbaijan actively participates in the implementation of the United Nations Framework Convention on Climate Change (UNFCCC), the Kyoto Protocol and the Paris Agreement.

In accordance with Article 4 of the Paris Agreement, the Republic of Azerbaijan has introduced its Nationally Determined Contributions (NDC) to the UNFCCC Secretariat in October 2015 and as a contribution to initiatives for preventing global climate change (mitigation initiatives) compared to 1990 (base year) aims to reduce greenhouse gas emissions by 35% by 2030.

Since then, the evaluation of the policies pursued by the government and the assessment of the measures taken has created an opportunity to propose a higher target to the government of Azerbaijan. Taking into account the new realities after the liberation of about 20 percent of the country's territories from a 30-year occupation and strategic socio-economic development programmes, national circumstances, especially the plans for diversification of the economy over the next decade, the proposed GHG emission reduction target by 2030 in Azerbaijan's Nationally Determined Contribution document is quite ambitious.

Azerbaijan plans to take part in reducing the environmental impact on the basis of its NDC document, primarily through its domestic capacities and has, nevertheless, taken important steps in international cooperation in accordance with Article 6 of the Paris Agreement.

The new version of **Azerbaijan's Nationally Determined Contributions by 2050** includes the following elements:

**Azerbaijan's Nationally Determined Contributions by 2050** were renewed in 2023.

The Republic of Azerbaijan, subject to its sustainable socio-economic development, will seek to reduce greenhouse gas emissions by 40% compared to 1990 (base year) level by 2050 if international support is provided through financing, technology transfer and capacity building.

| As per the instructions outlined in the decision 4/CMA. |<|  
|---|---|  
| **Quantitative data as of the reference date (including base year, if applicable):** |<|  
| Reference (base) year | 1990 |  


---

| Quantitative data as of reference date the | In the 4 th National Communication of Azerbaijan, the total volume of greenhouse gas emissions for the reporting year (taking into account the absorption volume in the land use, land-use change and forestry (LULUCF) sector) is assumed to be 79 Mt of CO2 equivalent \*1 |  
|---|---|  
| Objective regarding numerical reference | Reduction of greenhouse gas emissions by up to 40% compared to 1990 level, taking into account the maximum absorption capacity of forests and other ecosystems. |  
| Conditions for changing reference indicators | Total greenhouse gas emissions into the atmosphere can be and through methodological improvements. Updates will be included in the next Biennial Update Report or Biennial and National Communication. Transparency Report updated  recalculated |  
| **Deadlines and/or terms for execution:** |<|  
| The deadlines and/or terms for execution, as well as the start and end date, in accordance with any relevant decision made at the Conference of the Parties of the Paris Agreement | From 1 January 2023 to 31 December 2050. |  
| Consideration of paragraphs 31 (c) and (d) of decision 1/CP.21 by the country | included inventory. In its 4th National Communication, Azerbaijan recorded updated information on sources included in the greenhouse gas  Information on greenhouse gas emissions into the atmosphere from sources that have not been accounted for due to lack of data will be  the reports after restructuring of the Monitoring, Reporting and Verification (MRV) system in accordance with the requirements of the improved transparency format specified in Article 13 of the Paris Agreement. If air emissions from these sources are significant, i.e., if they are considered a key emission sector, Azerbaijan will provide relevant clarifying information on these emissions the UNFCCC Secretariat. in the next reports to in |  
| **Scale and scope:** |<|  
| General description of the target | Target covering all sectors |  


1 *It should be noted that during the GHG inventory for 1990 (base year), there was some missing data to calculate emissions/absorbances for all categories mentioned in the relevant methodological guidelines prepared by the IPCC in 2006. In this regard, when inventories are conducted in subsequent years based on more complete data for the base year, the GHG emission figure for that year may be subject to change in the assessment. Moreover, due to the lack of data for the occupied territories of Azerbaijan for about 30 years, it is foreseen to recalculate emissions/absorbances for this period.*



In [252]:
# we can get first list of tables and then use same fucntion to get the tables one by one
tables =  get_table(server=url,request_id=loads(r_status.text)['id'])

for table in literal_eval(tables.columns[0]):
    print("There are {} tables on page {}".format(table.rsplit('/')[-1],table.rsplit('/')[-2]))

urllib3.connectionpool - DEBUG - Starting new HTTP connection (1): localhost:3001
urllib3.connectionpool - DEBUG - http://localhost:3001 "GET /api/v1/csv/6a47c1790b724991974571e67539ef HTTP/1.1" 200 99


There are 1 tables on page 4
There are 1 tables on page 5


In [253]:
from IPython.display import display, HTML
# pass page number and table_number
df = get_table(server=url,request_id=loads(r_status.text)['id'], page=5, table=1)
display(HTML(df.to_html()))

urllib3.connectionpool - DEBUG - Starting new HTTP connection (1): localhost:3001
urllib3.connectionpool - DEBUG - http://localhost:3001 "GET /api/v1/csv/6a47c1790b724991974571e67539ef/5/1 HTTP/1.1" 200 1925


Unnamed: 0,Quantitative data as of reference date the,"In the 4 th National Communication of Azerbaijan, the total volume of greenhouse gas emissions for the reporting year (taking into account the absorption volume in the land use, land-use change and forestry (LULUCF) sector) is assumed to be 79 Mt of CO2 equivalent *1"
0,Objective regarding numerical reference,"Reduction of greenhouse gas emissions by up to 40% compared to 1990 level, taking into account the maximum absorption capacity of forests and other ecosystems."
1,Conditions for changing reference indicators,Total greenhouse gas emissions into the atmosphere can be and through methodological improvements. Updates will be included in the next Biennial Update Report or Biennial and National Communication. Transparency Report updated recalculated
2,Deadlines and/or terms for execution:,
3,"The deadlines and/or terms for execution, as well as the start and end date, in accordance with any relevant decision made at the Conference of the Parties of the Paris Agreement",From 1 January 2023 to 31 December 2050.
4,Consideration of paragraphs 31 (c) and (d) of decision 1/CP.21 by the country,"included inventory. In its 4th National Communication, Azerbaijan recorded updated information on sources included in the greenhouse gas Information on greenhouse gas emissions into the atmosphere from sources that have not been accounted for due to lack of data will be the reports after restructuring of the Monitoring, Reporting and Verification (MRV) system in accordance with the requirements of the improved transparency format specified in Article 13 of the Paris Agreement. If air emissions from these sources are significant, i.e., if they are considered a key emission sector, Azerbaijan will provide relevant clarifying information on these emissions the UNFCCC Secretariat. in the next reports to in"
5,Scale and scope:,
6,General description of the target,Target covering all sectors


In [250]:
with open('filetest.md', 'w') as file:
    file.write(r_markdown)

## HuggingFace (public server)

Follow the instructions: https://huggingface.co/docs/hub/en/spaces-sdks-docker

*Dockerfile*

`FROM axarev/parsr

EXPOSE 3001:3001`

*Readme.md*
sdk: docker
app_port: 3001

For the API end points: Get the 'Direct URL' from the 'Embed this space' from options next to 'Settings' on space Ui.

In [254]:
url = "https://giz-axaparsr-server.hf.space"
r  = send_doc(url=url,file_path=file_path,config_path=config_path)
r

urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): giz-axaparsr-server.hf.space:443
urllib3.connectionpool - DEBUG - https://giz-axaparsr-server.hf.space:443 "POST /api/v1/document HTTP/1.1" 202 30


{'file': './documents/testfile.pdf',
 'config': './server/defaultConfig.json',
 'status_code': 202,
 'server_response': '10df52d730e892b93897c683778de7'}

In [255]:
# it can take few sec to few min to process the pdf
r_status = get('{}/api/v1/queue/{}'.format(url,r['server_response']))
print(r_status)
loads(r_status.text)

urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): giz-axaparsr-server.hf.space:443
urllib3.connectionpool - DEBUG - https://giz-axaparsr-server.hf.space:443 "GET /api/v1/queue/10df52d730e892b93897c683778de7 HTTP/1.1" 201 257


<Response [201]>


{'id': '10df52d730e892b93897c683778de7',
 'json': '/api/v1/json/10df52d730e892b93897c683778de7',
 'csv': '/api/v1/csv/10df52d730e892b93897c683778de7',
 'text': '/api/v1/text/10df52d730e892b93897c683778de7',
 'markdown': '/api/v1/markdown/10df52d730e892b93897c683778de7'}

## Azure Container Instance (public server)

Add the TCP 3001 port when creating container. 
Once container is up and running use FQDN for API endpoints

API end points will be : FQDN:3001

In [256]:
url = "http://axaparsr.dggxf4fce9athrhp.westeurope.azurecontainer.io:3001"
r  = send_doc(url=url,file_path=file_path,config_path=config_path)
r

urllib3.connectionpool - DEBUG - Starting new HTTP connection (1): axaparsr.dggxf4fce9athrhp.westeurope.azurecontainer.io:3001
urllib3.connectionpool - DEBUG - http://axaparsr.dggxf4fce9athrhp.westeurope.azurecontainer.io:3001 "POST /api/v1/document HTTP/1.1" 202 30


{'file': './documents/testfile.pdf',
 'config': './server/defaultConfig.json',
 'status_code': 202,
 'server_response': '81cdc32d7da23ef2a2488b68665b9a'}

In [20]:
# from haystack.nodes import ParsrConverter
# converter = ParsrConverter()
# import os

# def process(file):
#     # print(file)
#     docs = converter.convert(file)
#     return docs

In [21]:
# %%time
# import glob
# path_to_data = './documents/MWTS/'
# mwts_placeholder = []
# files = glob.glob(path_to_data+"*")
# for file in files:
#     try:
#         docs = process(file)
#         mwts_placeholder.append({'filename':os.path.basename(file),'haystack_doc':docs})
#     except:
#         print(f"error in processing {file}")


In [None]:
# from parsr_client import ParsrClient as client
# parsr = client('localhost:3001')

# job = parsr.send_document(
#     file_path='./documents/testfile.pdf',
#     config_path='./server/defaultConfig.json',
#     document_name='Sampletest',
#     wait_till_finished=False,
#     save_request_id=True,
# )

# RenderJSON(job)
# parsr.get_status("a135af11bbf348ef193144e756b4d7")
# r = get('http://{}/api/v1/markdown/{}'.format(parsr.server, "a135af11bbf348ef193144e756b4d7"))
# url = "https://giz-parsrtest.hf.space:3001"
# from parsr_client import ParsrClient as client
# parsr = client(url)