**1. Libraries**

In [1]:
from requests_html import HTMLSession
import pandas as pd
import re, os, shutil, sqlite3

**2. Directories**

In [2]:
## Creating directories
path_raw = os.path.join(os.getcwd(), 'Indicator')
if os.path.exists(path_raw) == False: 
    os.mkdir(path_raw)

**3. Connecting**

In [3]:
session = HTMLSession()

In [4]:
url = 'https://www.dane.gov.co/index.php/estadisticas-por-tema/demografia-y-poblacion/estimaciones-del-cambio-demografico'

In [5]:
response = session.get(url)

**3.1 Scrapping**

In [6]:
links = response.html.absolute_links

In [7]:
Links = list()
for link in links:
    if re.search('anexo-cambio-demografico-SumaryTable[0-9]{4}-[0-9]{4}.xlsx$', link):
        Links.append(link)
        print(link)
        continue

https://www.dane.gov.co/files/censo2018/cambio-demografico/anexo-cambio-demografico-SumaryTable1985-2017.xlsx
https://www.dane.gov.co/files/censo2018/cambio-demografico/anexo-cambio-demografico-SumaryTable1950-1984.xlsx


**3.2 Download and export**

In [8]:
def get_csv(links,csv_name):
    df_all = pd.DataFrame()
    paths = list()
            
    for web in links:
        print('Reading', web)
        sheet1 = pd.read_excel(web, skiprows=11, sheet_name = 0, converters={'DP':str})
        sheet2 = pd.read_excel(web, skiprows=11, sheet_name = 1, converters={'DP':str})
        print('......Cleaning')
        complete_table = sheet1.merge(sheet2, how='inner', on=['DP', 'DPNOM', 'AÑO'])
        complete_table = complete_table.rename({'AÑO':'year', '(%)':'Tasa_crecimiento', 'sexos': 'Esperanza_total', 
                              'Hombres':'Esperanza_hombres', 'Mujeres':'Esperanza_mujeres',
                               'sexos.1':'Tasa_mortalidad_infantil_total',
                               'Hombres.1':'Tasa_mortalidad_infantil_hombres',
                               'Mujeres.1':'Tasa_mortalidad_infantil_mujeres',
                               'por mujer': 'Fecundidad_quinquenales',
                               'por mujer.1':'Fecundidad_edades_simples',
                               'e(0)': 'Diferencia_TMI_hombres_mujeres'}, axis='columns')
        complete_table = complete_table.drop(columns=['DPNOM'])
        complete_table.columns = complete_table.columns.str.replace("\.|\s|x", "")
        complete_table['Tasa_crecimiento'] = complete_table['Tasa_crecimiento'].fillna('NULL')
        df_all = pd.concat([df_all, complete_table])
        print('......Completed')
    return df_all

In [9]:
df_all = get_csv(Links, 'Indicator\\Indicators.csv')

Reading https://www.dane.gov.co/files/censo2018/cambio-demografico/anexo-cambio-demografico-SumaryTable1985-2017.xlsx
......Cleaning
......Completed
Reading https://www.dane.gov.co/files/censo2018/cambio-demografico/anexo-cambio-demografico-SumaryTable1950-1984.xlsx


  complete_table.columns = complete_table.columns.str.replace("\.|\s|x", "")


......Cleaning
......Completed


In [10]:
list_str = str()
for col in df_all.columns:
    if col == 'DP':
        list_str = list_str + 'departamento_id' + "   CHAR(2) NULL, "
    else:
        list_str = list_str + col + "  INTEGER NULL,"

In [11]:
con = sqlite3.connect('Colombia_Demographic_data.sqlite', timeout=30)
cur = con.cursor()
cur.executescript('DROP TABLE IF EXISTS Indicadores; CREATE TABLE Indicadores(' + list_str + 'FOREIGN KEY(departamento_id)  REFERENCES Departamento(id));')

<sqlite3.Cursor at 0x2b1154b9f40>

In [12]:
con.commit()

In [13]:
list_str = str()
for col in df_all.columns:
    if col == 'DP':
        list_str = list_str + 'departamento_id' + ', '
    else:
        list_str = list_str + col + ', '
list_str = list_str[:-2] + ') VALUES'

In [14]:
insert = 'INSERT INTO Indicadores(' + list_str
for row in range(0,len(df_all)):
    i = 0
    insert += '('
    for feature in df_all.iloc[row]:
        if i == 0:
            insert = insert + "'" + str(feature) + "'" + ','
        else:
            insert = insert + str(feature) + ','
        i = i +1
    insert = insert[:-1] + '),'
    
insert = insert[:-1] + ';'   

In [15]:
cur.executescript(insert)

<sqlite3.Cursor at 0x2b1154b9f40>

In [16]:
con.commit()

In [17]:
con.close()

**4. Remove directories**

In [18]:
shutil.rmtree(path_raw)