In [1]:
import requests
import os
import re
import csv
import time
import numpy as np

In [9]:
def batch_process(year, index, batch_size):
    """
    Batch processing API calls. A batch is a subset of index of pages to be retrieved.
    
    Parameters
    ----------
    year : Numeric
    index : np.ndarray
    batch_size: Numeric
    """
    n = len(index)
    
    for ii in np.arange(0, n, batch_size):
        # batch is subset of index
        batch = index[ii:ii+batch_size]
        print(f'running batch: {batch[0]}-{batch[-1]}')
        
        # get data for batch
        data = get_data(year, batch)
        print('Current Data Array Length: ', len(data))
        
        # write to csv
        if ii == 0:
            write_data(data, write_header=True)
        else:
            write_data(data, write_header=False)
        
def tweak_record(dd):
    """
    """
    return {'id' : dd['id'] if dd['id'] is not None else np.nan,
            'ocid' : dd['ocid'] if dd['ocid'] is not None else np.nan, 
            'metodo' : dd['method'].title() if dd['method'] is not None else np.nan, 
            'tipo' : dd['internal_type'] if dd['internal_type'] is not None else np.nan, 
            'loc' : dd['locality'].title() if dd['locality'] is not None else np.nan, 
            'prov' : dd['region'].title() if dd['region'] is not None else np.nan, 
            'proveedores' : dd['suppliers'].title() if dd['suppliers'] is not None else np.nan, 
            'contratante' : dd['buyer'].title() if dd['buyer'] is not None else np.nan, 
            'ejecutado' : round(float(dd['amount']), 2) if dd['amount'] is not None else np.nan, 
            'presupuesto' : round(float(dd['budget']), 2) if dd['budget'] is not None else np.nan, 
            'descripcion' : re\
                .sub('(\s)?.rden de compra para adquirir los siguientes productos:(\s)+', '',  dd['description'])\
                .title()\
                .strip() if dd['description'] is not None else np.nan,
            'date' : dd['date'].replace('T', ' ')  if dd['date'] is not None else np.nan,}


def get_data(year, index):
    """
    Function makes API calls to the SERCOP's open data API. It stores data on government's procurement operations and write \
    the data to the file contratacion_db.csv.
    
    Parameters
    ----------
    year : Numeric
        Must be an int or float or object to be able to be converted to int data type.
    index : Numeric
        Must be an int or float or object to be able to be converted to int data type.
          
    """
    data = []
    url = 'https://datosabiertos.compraspublicas.gob.ec/PLATAFORMA/api/search_ocds'
    
    for page in index:
        try:
            response = requests.get(url, params={'year':int(year), 'page':int(page)})
            
            if int(response.headers.get('X-RateLimit-Remaining')) < 1:
                time.sleep(7)
            
            page_ = response.json()['page']
            data_ = response.json()['data']
            
            data_ = [tweak_record(dd) for dd in data_]
            
            print('-'*100)
            print('Parsing page: ', page_)
            print('Response Status: ', response.status_code)
            print('Rate Limit Remaining: ', response.headers.get('X-RateLimit-Remaining'))
            print('Page\'s size: ', len(data_))
            
            data += data_
            
        except Exception as e:
            print('-'*100)
            print('**Call crashed**')
            print(f'An error ocurred: {e}')
            print('Parsing page: ', page_)
            print('Response Status: ', response.status_code)
            print('Rate Limit Remaining: ', response.headers.get('X-RateLimit-Remaining'))
            print('Page\'s size: ', len(data_))
            print('**Call crashed**')
        
        # I expect that data is a list of dictionaries.
    return data        

        
def write_data(data, write_header=False):
    """
    Function writes rows to csv 
    """
    # select keys as column names
    cols = data[0].keys()

    with open('../data/contratacion_db.csv', 'a', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=cols)
        
        # if write_header is set to True, write headers.
        if write_header:
            writer.writeheader()
        
        # write csv rows with data dictionaries 
        writer.writerows(data) 
        
# @write functions to run asyncroniously.
# @Unveil the mysteries of OCID.
    #

In [19]:
500 / 516.19 

0.9686355799221216

In [12]:
data = []
for i in range(10):
    data_ = {}
    data_[str(i)] = i
    
data.extend(data_)

In [13]:
data

['9']

In [2]:
url = 'https://datosabiertos.compraspublicas.gob.ec/PLATAFORMA/api/search_ocds'
response = requests.get(url, params={'year':2023, 'page': 1041})

In [3]:
print(response.headers)

{'Date': 'Mon, 10 Apr 2023 14:53:10 GMT', 'Server': 'Apache/2.4.6 (Red Hat Enterprise Linux) OpenSSL/1.0.2k-fips', 'Docker-Distribution-Api-Version': 'backend, backend', 'Host': 'datosabiertos.compraspublicas.gob.ec', 'X-Powered-By': 'PHP/8.0.13', 'Cache-Control': 'no-cache, private', 'Content-Type': 'application/json', 'X-RateLimit-Limit': '60', 'X-RateLimit-Remaining': '59', 'Access-Control-Allow-Origin': '*', 'Keep-Alive': 'timeout=5, max=100', 'Connection': 'Keep-Alive', 'Transfer-Encoding': 'chunked', 'Strict-Transport-Security': 'max-age=157680000'}


In [3]:
rate_limit_remaining = response.headers.get('X-RateLimit-Remaining')
print(rate_limit_remaining)

57


In [34]:
for dd in response.json()['data']:
    dd_out = {}
    if dd['method'] is not None:
        dd_out['metodo'] = dd['method']
    else:
        dd_out['metodo'] = np.nan
        
    if dd['budget'] is not None:
        dd_out['presupuesto'] = dd['budget']
    else:
        dd_out['presupuesto'] = np.nan
        
    print(dd_out)

{'metodo': nan, 'presupuesto': nan}
{'metodo': 'open', 'presupuesto': nan}
{'metodo': 'selective', 'presupuesto': '106.3888'}
{'metodo': nan, 'presupuesto': nan}
{'metodo': nan, 'presupuesto': nan}
{'metodo': nan, 'presupuesto': nan}
{'metodo': nan, 'presupuesto': nan}
{'metodo': nan, 'presupuesto': nan}
{'metodo': 'selective', 'presupuesto': '6.72'}
{'metodo': nan, 'presupuesto': nan}


In [37]:
# Could I use list comprehension to create all the necessary values and keys to be merged into one dictionary???
# 1. Is if possible to create a dictionary from a list of keys and a list of values
# 2. How coud

def process_data(dd):
    return {'metodo': dd['method'] if dd['method'] is not None else np.nan,
            'presupuesto': dd['budget'] if dd['budget'] is not None else np.nan}

for dd in response.json()['data']:
    print(process_data(dd))

{'metodo': nan, 'presupuesto': nan}
{'metodo': 'open', 'presupuesto': nan}
{'metodo': 'selective', 'presupuesto': '106.3888'}
{'metodo': nan, 'presupuesto': nan}
{'metodo': nan, 'presupuesto': nan}
{'metodo': nan, 'presupuesto': nan}
{'metodo': nan, 'presupuesto': nan}
{'metodo': nan, 'presupuesto': nan}
{'metodo': 'selective', 'presupuesto': '6.72'}
{'metodo': nan, 'presupuesto': nan}


In [13]:
response.json()['data'][0]

{'id': 2223017,
 'ocid': 'ocds-5wno2w-SIE-HSLO-001-2023-2580',
 'year': 2023,
 'month': 2,
 'method': None,
 'internal_type': None,
 'locality': 'OTAVALO',
 'region': 'IMBABURA',
 'suppliers': None,
 'buyer': 'HOSPITAL SAN LUIS DE OTAVALO',
 'amount': None,
 'date': '2023-02-23T00:00:00-05:00',
 'title': None,
 'description': None,
 'budget': None}

In [8]:
## 1
# Crear el array entero y despues encontrar las subdivisiones
##
# Crear una array espeficicando los limites y despues crear mas subarrays

larr = np.arange(1, 1001, 1)
np.array_split?

[1;31mSignature:[0m [0mnp[0m[1;33m.[0m[0marray_split[0m[1;33m([0m[0mary[0m[1;33m,[0m [0mindices_or_sections[0m[1;33m,[0m [0maxis[0m[1;33m=[0m[1;36m0[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Split an array into multiple sub-arrays.

Please refer to the ``split`` documentation.  The only difference
between these functions is that ``array_split`` allows
`indices_or_sections` to be an integer that does *not* equally
divide the axis. For an array of length l that should be split
into n sections, it returns l % n sub-arrays of size l//n + 1
and the rest of size l//n.

See Also
--------
split : Split array into multiple sub-arrays of equal size.

Examples
--------
>>> x = np.arange(8.0)
>>> np.array_split(x, 3)
[array([0.,  1.,  2.]), array([3.,  4.,  5.]), array([6.,  7.])]

>>> x = np.arange(9)
>>> np.array_split(x, 4)
[array([0, 1, 2]), array([3, 4]), array([5, 6]), array([7, 8])]
[1;31mFile:[0m      c:\users\elias\anaconda3\lib\site-packages

In [None]:
ocid
ocds-5wno2w-CE-20150000091786-40764

### Número de páginas según año
- 2015 --> pages: 28065
- 2016 --> pages: 27964
- 2017 --> pages: 33428
- 2018 --> pages: 35769
- 2019 --> pages: 27506
- 2020 --> pages: 16068
- 2021 --> pages: 16706
- 2022 --> pages: 19546
- 2023 --> pages: 1354

Unnamed: 0,id,ocid,year,month,method,internal_type,locality,region,suppliers,buyer,amount,date,title,description,budget
0,1289243,ocds-5wno2w-CE-20150000091786-40764,2015,1,direct,Catálogo electrónico - Compra directa,EMPALME,GUAYAS,BRELDYNG S.A.,MUNICIPIO DEL CANTÓN EL EMPALME,1470.268800,2015-01-05T00:00:00-05:00,Orden de compra CE-20150000091786,Orden de compra para adquirir los siguientes p...,1470.2688
1,1289244,ocds-5wno2w-CE-20150000091789-54538,2015,1,direct,Catálogo electrónico - Compra directa,FRANCISCO DE ORELLANA,ORELLANA,ABL PHARMA ECUADOR S.A.,HOSPITAL FRANCISCO DE ORELLANA,418.500000,2015-01-05T00:00:00-05:00,Orden de compra CE-20150000091789,Orden de compra para adquirir los siguientes p...,418.5
2,1289245,ocds-5wno2w-CE-20150000091790-54538,2015,1,direct,Catálogo electrónico - Compra directa,FRANCISCO DE ORELLANA,ORELLANA,ABL PHARMA ECUADOR S.A.,HOSPITAL FRANCISCO DE ORELLANA,1724.600000,2015-01-05T00:00:00-05:00,Orden de compra CE-20150000091790,Orden de compra para adquirir los siguientes p...,1724.6
3,1289246,ocds-5wno2w-CE-20150000091791-54538,2015,1,direct,Catálogo electrónico - Compra directa,FRANCISCO DE ORELLANA,ORELLANA,DISTRIBUIDORA FARMACEUTICA ECUATORIANA DIFARE ...,HOSPITAL FRANCISCO DE ORELLANA,756.000000,2015-01-05T00:00:00-05:00,Orden de compra CE-20150000091791,Orden de compra para adquirir los siguientes p...,756
4,1289247,ocds-5wno2w-CE-20150000091792-54538,2015,1,direct,Catálogo electrónico - Compra directa,FRANCISCO DE ORELLANA,ORELLANA,GRUNENTHAL ECUATORIANA CIA LTDA,HOSPITAL FRANCISCO DE ORELLANA,200.000000,2015-01-05T00:00:00-05:00,Orden de compra CE-20150000091792,Orden de compra para adquirir los siguientes p...,200
5,1289248,ocds-5wno2w-CE-20150000091793-54538,2015,1,direct,Catálogo electrónico - Compra directa,FRANCISCO DE ORELLANA,ORELLANA,LABORATORIOS GENERICOS FARMACEUTICOS ECUATORIA...,HOSPITAL FRANCISCO DE ORELLANA,360.000000,2015-01-05T00:00:00-05:00,Orden de compra CE-20150000091793,Orden de compra para adquirir los siguientes p...,360
6,1289249,ocds-5wno2w-CE-20150000091794-54538,2015,1,direct,Catálogo electrónico - Compra directa,FRANCISCO DE ORELLANA,ORELLANA,LABORATORIOS INDUSTRIALES FARMACEUTICOS ECUATO...,HOSPITAL FRANCISCO DE ORELLANA,1420.800000,2015-01-05T00:00:00-05:00,Orden de compra CE-20150000091794,Orden de compra para adquirir los siguientes p...,1420.8
7,1289250,ocds-5wno2w-CE-20150000091795-54538,2015,1,direct,Catálogo electrónico - Compra directa,FRANCISCO DE ORELLANA,ORELLANA,LETERAGO DEL ECUADOR S.A,HOSPITAL FRANCISCO DE ORELLANA,608.000000,2015-01-05T00:00:00-05:00,Orden de compra CE-20150000091795,Orden de compra para adquirir los siguientes p...,608
8,1289251,ocds-5wno2w-CE-20150000091796-54538,2015,1,direct,Catálogo electrónico - Compra directa,FRANCISCO DE ORELLANA,ORELLANA,LETERAGO DEL ECUADOR S.A,HOSPITAL FRANCISCO DE ORELLANA,1080.000000,2015-01-05T00:00:00-05:00,Orden de compra CE-20150000091796,Orden de compra para adquirir los siguientes p...,1080
9,1289252,ocds-5wno2w-CE-20150000091797-54538,2015,1,direct,Catálogo electrónico - Compra directa,FRANCISCO DE ORELLANA,ORELLANA,LETERAGO DEL ECUADOR S.A,HOSPITAL FRANCISCO DE ORELLANA,324.000000,2015-01-05T00:00:00-05:00,Orden de compra CE-20150000091797,Orden de compra para adquirir los siguientes p...,324
