**1. Libraries**

In [None]:
from requests_html import HTMLSession
import pandas as pd
import re, os, urllib, shutil

**2. Directories**

In [None]:
## Creating directories
path_raw = os.path.join(os.getcwd(), 'Projections')
path_national = os.path.join(path_raw, 'National')
path_departmental = os.path.join(path_raw, 'Departmental')
path_municipal = os.path.join(path_raw, 'Municipal')
if os.path.exists(path_raw) == False: 
    os.mkdir(path_raw)
if os.path.exists(path_national) == False: 
    os.mkdir(path_national)
if os.path.exists(path_departmental) == False: 
    os.mkdir(path_departmental)
if os.path.exists(path_municipal) == False: 
    os.mkdir(path_municipal)

**3. Connecting**

In [None]:
session = HTMLSession()

In [None]:
url = 'https://www.dane.gov.co/index.php/estadisticas-por-tema/demografia-y-poblacion/proyecciones-de-poblacion'

In [None]:
response = session.get(url)

**3.1 Scrapping**

In [None]:
links = response.html.absolute_links

In [None]:
National = list()
Departmental = list()
Municipal = list()
for link in links:
    if re.search('anexo-(area-sexo-edad-)*proyecciones-poblacion-Nacional[0-9]{4}_[0-9]{4}.xlsx$', link):
        National.append(link)
        continue
    if re.search('anexo-(area-sexo-edad-)*proyecciones-poblacion-departamental_[0-9]{4}-[0-9]{4}.xlsx$', link):
        Departmental.append(link)
        continue
    if re.search('anexo-(area-sexo-edad-)*proyecciones-poblacion-Municipal_[0-9]{4}-[0-9]{4}.xlsx$', link):
        Municipal.append(link)
        continue

**3.2 Download and export**

In [None]:
def get_csv(links, pattern, path_level,csv_name):
    df_all = pd.DataFrame()
    paths = list()
    
    for web in links:
            name = re.findall(pattern, web)[0]
            path_doc = os.path.join(path_level, name)
            paths.append(path_doc)
            
    for path in paths:
        if os.path.isfile(path) == False:
            print('Reading', path)
            urllib.request.urlretrieve(web, path)
        print('File downloaded')
        current = pd.read_excel(path, skiprows=11)
        print(current.head(2))
        df_all = pd.concat([df_all, current]).reset_index(drop=True)
        print(name, 'read')
    
    cols_to_split = df_all.loc[:,'Hombres_0':].columns.values
    ids = df_all.loc[:,:'ÁREA GEOGRÁFICA'].columns.values
    print('ids and split identified')
    new_all = pd.melt(df_all, id_vars =ids, value_vars =cols_to_split, var_name ='Sexo_edad', value_name ='No_personas')

    print('col to row')
    new_all[["Sexo", "Edad"]] = new_all.Sexo_edad.str.split(pat='_', n = 1, expand = True)
    print('sex and age splited')
    new_all.drop(columns=['Sexo_edad'])
    
    new_all.to_csv(csv_name, encoding='utf-8-sig', index=False)

In [None]:
get_csv(National, "Nacional[0-9]{4}_[0-9]{4}.xlsx$", path_national, 'Projections\\National.csv')

In [None]:
get_csv(Departmental, "departamental_[0-9]{4}-[0-9]{4}.xlsx$", path_departmental, 'Projections\\Departmental.csv')

In [None]:
get_csv(Municipal, "Municipal_[0-9]{4}-[0-9]{4}.xlsx$", path_municipal,'Projections\\Municipal.csv')

**4. Remove directories**

In [None]:
shutil.rmtree(path_national)
shutil.rmtree(path_departmental)
shutil.rmtree(path_municipal)