In [None]:
import os
import requests

In [None]:
from bs4 import BeautifulSoup
from settings import (
    CARTOCIUDAD_LIST,
    CNIG_LIMITS_FILENAME,
    CNIG_UNZIP,
    DATA_FOLDER,
    DOWNLOAD_URL,
    GVA_DOWNLOAD_AGAIN,
    GVA_HOST,
    REGISTER_FILENAME,
)
from tqdm import tqdm
from zipfile import ZipFile


In [None]:
LOCAL_REGISTER_FILENAME = os.path.join(DATA_FOLDER,REGISTER_FILENAME)

### DOWNLOAD GVA Schools Data

In [None]:
download_again = (not os.path.exists(LOCAL_REGISTER_FILENAME)) or (GVA_DOWNLOAD_AGAIN)
if download_again:
    # Get new register file link
    web_response = requests.get(f'{GVA_HOST}/{DOWNLOAD_URL}')

    webcontent = web_response.text
    soup = BeautifulSoup(webcontent, 'html.parser')

    register_url = [ reg.get('href') for reg in soup.select('a') if REGISTER_FILENAME in reg.get('href','')]
    
    # Download data from the new register link
    register_link = f'{GVA_HOST}{register_url[0]}'
    with requests.get(register_link, stream=True) as register_response:
        # Save register data in disk
        with open(LOCAL_REGISTER_FILENAME, 'wb') as lrf:
            for chunk in register_response.iter_content(chunk_size=8192):
                lrf.write(chunk)
        print(f'INFO: Downloaded NEW {LOCAL_REGISTER_FILENAME}')
else:
    print(f'INFO: No NEW Download: Info in {LOCAL_REGISTER_FILENAME}')

### UNZIP CARTOCiudad PK Data 

In [None]:
# Process the list of provinces from settings
for provzipname in tqdm(CARTOCIUDAD_LIST):
    zipname =  os.path.join(DATA_FOLDER,'zip', f'CARTOCIUDAD_CALLEJERO_{provzipname}.zip')
    if os.path.exists(zipname):
        with ZipFile(zipname, 'r') as zipfile:
            for filearchive in zipfile.filelist:
                internal_filename = filearchive.filename
                # We only want the PK data
                if 'PK' in internal_filename:    
                    old_name = os.path.join(DATA_FOLDER,'PK',internal_filename)
                    new_name = os.path.join(DATA_FOLDER,'PK', f"{provzipname}_{internal_filename.split('/')[1]}")
                    unzip_again = (not os.path.exists(new_name)) or (CNIG_UNZIP)
                    if unzip_again:
                        zipfile.extract(internal_filename, os.path.join(f'{DATA_FOLDER}','PK'))
                        os.rename(old_name, new_name)
                        os.removedirs(os.path.join(DATA_FOLDER,'PK',f'CARTOCIUDAD_CALLEJERO_{provzipname}/'))
                        print(f'INFO: Unziped {new_name}')
                    else:
                        print(f'INFO: No NEW Unzip for {new_name}')
    else:
        print(f'Download CARTOCiudad files for {provzipname} from CNIG: http://centrodedescargas.cnig.es/')

### UNZIP CNIG Municipal Limits

In [None]:
zipname =  os.path.join(DATA_FOLDER,'zip', 'lineas_limite.zip')
if os.path.exists(zipname):
    with ZipFile(zipname, 'r') as zipfile:
        for filearchive in zipfile.filelist:
            internal_filename = filearchive.filename
            if CNIG_LIMITS_FILENAME in internal_filename:
                old_name = os.path.join(DATA_FOLDER,internal_filename)
                new_name = os.path.join(DATA_FOLDER, f"{internal_filename.split('/')[-1]}")
                folder_tree = internal_filename.split('/')[:-1]
                unzip_again = (not os.path.exists(new_name)) or (CNIG_UNZIP)
                if unzip_again:
                    zipfile.extract(internal_filename, DATA_FOLDER)
                    os.rename(old_name, new_name)
                    print(f'INFO: Unziped {new_name}')
                else:
                    print(f'INFO: No NEW Unzip for {new_name}')
        os.removedirs(os.path.join(DATA_FOLDER, (os.sep).join(folder_tree)))
else:
    print(f'Download CNIG files for Municipal limits: http://centrodedescargas.cnig.es/')