In [92]:
import requests
from datetime import datetime, date, time
import csv

# Receive contracts

In [144]:
class ApiException(Exception): pass
class InvalidRequestException(ApiException): pass
class DataNotFoundException(ApiException): pass
class TooManyRecordsException(ApiException): pass

def get_raw_contracts_if_possible(params):
    raw_json = requests.get("http://openapi.clearspending.ru/restapi/v3/contracts/search/", params=params)
    if raw_json.text == "Invalid request.":
        raise InvalidRequestException(params)
    if raw_json.text == "Data not found.":
        raise DataNotFoundException(params)
        
    json = raw_json.json()["contracts"]
    contracts = json["data"]
    
    if (json["page"] == 1):
        total = json["total"]
        print 'total:', total,
        if total == 500:
            raise TooManyRecordsException(params)
        print '\t| 1',
        if total > 50:
            for page_num in range(2, total/50 + 2):
                params_with_page = params.copy()
                params_with_page['page'] = page_num
                print page_num,
                page_contracts = get_raw_contracts_if_possible(params_with_page)
                contracts.extend(page_contracts)
    return contracts




In [146]:
def get_deals_with_restriction(params, start_date, end_date):
    params_with_daterange = params.copy()
    params_with_daterange['daterange'] = start_date.strftime("%d.%m.%Y") + "-" + end_date.strftime("%d.%m.%Y")
    print params_with_daterange['daterange'], 
    try:
        contracts = get_raw_contracts_if_possible(params_with_daterange)
        print ''
    except TooManyRecordsException as e:
        print 'too many!'
        middle_date = start_date + (end_date - start_date)/2
        contracts = get_deals_with_restriction(params, start_date, middle_date)
        contracts2 = get_deals_with_restriction(params, middle_date, end_date)
        contracts.extend(contracts2)
    except Exception as e:
        print ''
        raise
    return contracts




In [131]:
def get_raw_contracts(okdp_list, start_date, end_date):
    contracts = []
    for okdp in okdp_list:
        print '!okdp', okdp,
        params = {
            "okdp_okpd": okdp
        }
        try:
            okdp_contracts = get_deals_with_restriction(params, start_date, end_date)
            contracts.extend(okdp_contracts)
        except ApiException as e:
            print type(e), e
    return contracts

# Preprocessing

In [107]:
def preprocessing(raw_contract):
    return {
        "regionCode": raw_contract["regionCode"],
        "signDate": raw_contract["signDate"],
        "executionDate": raw_contract["execution"]["year"] + "-" + raw_contract["execution"]["month"] + "-30T00:00:00" 
            if "month" in raw_contract["execution"] 
            else raw_contract["execution"]["endDate"],
        "price": raw_contract["price"],
        "supplier_regNum": raw_contract["regNum"],
        "supplier_inn": raw_contract["suppliers"][0]["inn"] 
            if "inn" in raw_contract["suppliers"][0] 
            else None,
        "supplier_name": raw_contract["suppliers"][0]["organizationName"].replace("\n", " ").replace("\r", " ") 
            if "organizationName" in raw_contract["suppliers"][0] 
            else None
    }

# Save data

In [127]:
def save(contracts, filename):
    with open(filename, "w") as outfile:
        f = csv.writer(outfile)
        for contract in contracts:
            f.writerow([
                unicode(contract["regionCode"]).encode("utf-8"),
                unicode(contract["signDate"]).encode("utf-8"),
                unicode(contract["executionDate"]).encode("utf-8"),
                unicode(str(contract["price"])).encode("utf-8"),
                unicode(contract["supplier_regNum"]).encode("utf-8"),
                unicode(contract["supplier_inn"]).encode("utf-8"),
                unicode(contract["supplier_name"]).encode("utf-8")
            ])

# Main

In [135]:
okdp_list = [
    "92.20.11.110",
    "92.20.11.111",
    "92.20.11.112",
    "92.20.11.120",
    "92.20.11.121",
    "92.20.11.122",
    "92.20.11.130",
    "92.20.11.131",
    "92.20.11.132",
    "92.20.11.140",
    "92.20.11.141",
    "92.20.11.142",
    "92.20.11.190",
    "92.20.11.191",
    "92.20.11.192",
    "92.20.12.110",
    "92.20.12.111",
    "92.20.12.112",
    "92.20.12.120",
    "92.20.12.121",
    "92.20.12.122",
    "92.20.12.130",
    "92.20.12.131",
    "92.20.12.132",
    "92.20.12.140",
    "92.20.12.141",
    "92.20.12.142",
    "92.20.12.190",
    "92.20.12.191",
    "92.20.12.192",
    "22.12.11.110",
    "22.12.11.120",
    "22.12.11.190",
    "22.12.11.210",
    "22.12.12.110",
    "22.12.12.111",
    "22.12.12.112",
    "22.12.12.113",
    "22.12.12.114",
    "22.12.12.115",
    "22.12.12.116",
    "22.12.12.117",
    "22.12.12.119",
    "22.12.12.120",
    "22.12.12.190",
    "22.12.21.110",
    "22.12.21.120",
    "22.12.21.190",
    "22.12.22.110",
    "22.12.22.120",
    "22.12.22.190",
    "22.12.99.000",
    "22.13.11.110",
    "22.13.11.111",
    "22.13.11.112",
    "22.13.11.113",
    "22.13.11.114",
    "22.13.11.115",
    "22.13.11.116",
    "22.13.11.117",
    "22.13.11.118",
    "22.13.11.119",
    "22.13.11.120",
    "22.13.11.121",
    "22.13.11.122",
    "22.13.11.123",
    "22.13.11.124",
    "22.13.11.125",
    "22.13.11.126",
    "22.13.11.129",
    "22.13.11.190",
    "22.13.11.210",
    "22.13.12.110",
    "22.13.12.111",
    "22.13.12.112",
    "22.13.12.113",
    "22.13.12.114",
    "22.13.12.115",
    "22.13.12.116",
    "22.13.12.117",
    "22.13.12.118",
    "22.13.12.119",
    "22.13.12.120",
    "22.13.12.121",
    "22.13.12.122",
    "22.13.12.123",
    "22.13.12.124",
    "22.13.12.125",
    "22.13.12.126",
    "22.13.12.129",
    "22.13.12.190",
    "22.13.21.110",
    "22.13.21.120",
    "22.13.21.190",
    "22.13.22.110",
    "22.13.22.120",
    "22.13.22.190",
    "22.13.99.000"
]

In [None]:
start_date = date(2014, 1, 1)
end_date = date(2015, 12, 31)

print 'RECEIVING'
raw_contracts = get_raw_contracts(okdp_list, start_date, end_date)
print 'PREPROCESSING'
contracts = [preprocessing(raw_contract) for raw_contract in raw_contracts]
print 'SAVING'
save(contracts, "contracts.csv")
print 'DONE!'

RECEIVING
!okdp 92.20.11.110 01.01.2014-31.12.2015 total: 1 	| 1 
!okdp 92.20.11.111 01.01.2014-31.12.2015 total: 20 	| 1 
!okdp 92.20.11.112 01.01.2014-31.12.2015 total: 23 	| 1 
!okdp 92.20.11.120 01.01.2014-31.12.2015 total: 41 	| 1 
!okdp 92.20.11.121 01.01.2014-31.12.2015 total: 407 	| 1 2 3 4 5 6 7 8 9 
!okdp 92.20.11.122 01.01.2014-31.12.2015 total: 457 	| 1 2 3 4 5 6 7 8 9 10 
!okdp 92.20.11.130 01.01.2014-31.12.2015 total: 3 	| 1 
!okdp 92.20.11.131 01.01.2014-31.12.2015 total: 23 	| 1 
!okdp 92.20.11.132 01.01.2014-31.12.2015 total: 24 	| 1 
!okdp 92.20.11.140 01.01.2014-31.12.2015 total: 2 	| 1 
!okdp 92.20.11.141 01.01.2014-31.12.2015 total: 9 	| 1 
!okdp 92.20.11.142 01.01.2014-31.12.2015 total: 12 	| 1 
!okdp 92.20.11.190 01.01.2014-31.12.2015 total: 2 	| 1 
!okdp 92.20.11.191 01.01.2014-31.12.2015 total: 88 	| 1 2 
!okdp 92.20.11.192 01.01.2014-31.12.2015 total: 94 	| 1 2 
!okdp 92.20.12.110 01.01.2014-31.12.2015 total: 4 	| 1 
!okdp 92.20.12.111 01.01.2014-31.12.2015 to