In [3]:
import requests
import urllib.parse
import pandas as pd
from pyproj import Transformer, CRS
import json
from tqdm.notebook import tqdm

from database import database

f = open('config_file.json')
config = json.load(f)
my_db = database(config['host_name'], config['user_name'], config['password'], config['db'])

from loader_bcn_open_data import loader_bcn_open_data

loader = loader_bcn_open_data()


MySQL Server connection successful
MySQL Database connection test successful


# Table censal_section

In [4]:
table_id = '50c9b17f-d297-4668-bad4-e1c217580747'
table = "censal_section"

df = loader.get_data(table_id)

df = df.dropna()
columns_to_delete = ['_id','SECC_EST', '_full_text', 'CODI_CARRER', 'ETRS89_COORD_X','ETRS89_COORD_Y', 'ED50_COORD_X', 'ED50_COORD_Y', 'NUMPOST', 'BARRI', 'DIST_POST', 'TIPUSNUM', 'LLEPOST']
df = df.drop(columns = columns_to_delete)
    
# Add local coordinates   
coord_transformer = Transformer.from_crs("WGS84", "EPSG:25831") # Coordinate transformer
df['EPSG_25831_X'] = coord_transformer.transform(df['LATITUD'].to_list(),df['LONGITUD'].to_list())[0]
df['EPSG_25831_Y'] = coord_transformer.transform(df['LATITUD'].to_list(),df['LONGITUD'].to_list())[1]

# Create table
my_db.execute_query(""" CREATE TABLE censal_section(
id INT AUTO_INCREMENT,
DISTRICT INT NOT NULL,
CENSAL_SECTION INT NOT NULL,
LATITUDE DECIMAL(8,6) NOT NULL,
LONGITUDE DECIMAL(8,6) NOT NULL,
EPSG_25831_X INT NOT NULL,
EPSG_25831_Y INT NOT NULL,
PRIMARY KEY (id)
)
""")

#Insert all data
col_names = ['LATITUDE', 'CENSAL_SECTION', 'DISTRICT', 'LONGITUDE', 'EPSG_25831_X', 'EPSG_25831_Y']
columns = ", ".join(col_names)

query = 'INSERT INTO censal_section ({columns}) VALUES '.format(columns = columns)
for row in tqdm(df.index):
    vals = ", ".join("'{s}'".format(s=s.replace("'", "''")) if isinstance(s, str) else str(s) for s in df.loc[row])
    query_line = "(" + vals + ") , "
    query = query + query_line
    
query = query[:-2] + ';'
#Insert all rows at once
my_db.execute_query(query)

Query successful


  0%|          | 0/171406 [00:00<?, ?it/s]

Query successful


# Table demographics

In [None]:
table_id = 'f1d9d5aa-61d7-460e-b423-1bbfff96fab3'
table = "demographics"

df = loader.get_data(table_id)

df = df.dropna()

# We need to split the censal section and district since in this table it is in a different format
df['CENSAL_SECTION'] = df['SECCIO_CENSAL'].str[-3:]
df['DISTRICT'] = df['SECCIO_CENSAL'].str[-5:-3]

# Delete unnecessary columns
columns_to_delete = df.columns[[6,8,10,11]]
df = df.drop(columns = columns_to_delete)

# Create table
my_db.execute_query(""" CREATE TABLE {table}(
id INT AUTO_INCREMENT,
DISTRICT INT NOT NULL,
CENSAL_SECTION INT NOT NULL,
MEN INT NOT NULL,
WOMEN INT NOT NULL,
AGED_0_TO_14 INT NOT NULL,
AGED_15_TO_24 INT NOT NULL,
AGED_25_TO_64 INT NOT NULL,
AGED_ABOVE_65 INT NOT NULL,
NATIONALS INT NOT NULL,
EU INT NOT NULL,
NON_EU INT NOT NULL,
PRIMARY KEY (id)
)
""".format(table = table))

#Insert all data
col_names = ['MEN', 'AGED_0_TO_14', 'AGED_ABOVE_65','WOMEN', 'AGED_25_TO_64', 'EU', 'NATIONALS', 'AGED_15_TO_24', 'NON_EU','CENSAL_SECTION', 'DISTRICT']
columns = ", ".join(col_names)

query = 'INSERT INTO {table} ({columns}) VALUES '.format(columns = columns, table = table)
for row in tqdm(df.index):
    vals = ", ".join("'{s}'".format(s=s.replace("'", "''")) if isinstance(s, str) else str(s) for s in df.loc[row])
    query_line = "(" + vals + ") , "
    query = query + query_line
    
query = query[:-2] + ';'
#Insert all rows at once
my_db.execute_query(query)

# Table hotels

In [None]:
table_id = '9bccce1b-0b9d-4cc6-94a7-459cb99450de'
table = "hotels"

df = loader.get_data(table_id)

columns_to_keep = ['geo_epgs_25831_x', 'geo_epgs_25831_y', 'name']
for c in df.columns:
    if (c not in columns_to_keep):
        df = df.drop(columns = c)

# Create table
my_db.execute_query(""" CREATE TABLE {table}(
id INT AUTO_INCREMENT,
NAME VARCHAR(100) NOT NULL,
EPSG_25831_X INT NOT NULL,
EPSG_25831_Y INT NOT NULL,
PRIMARY KEY (id)
)
""".format(table = table))

#Insert all data
col_names = ['EPSG_25831_X', 'NAME', 'EPSG_25831_Y']
columns = ", ".join(col_names)

query = 'INSERT INTO {table} ({columns}) VALUES '.format(columns = columns, table = table)
for row in tqdm(df.index):
    vals = ", ".join("'{s}'".format(s=s.replace("'", "''")) if isinstance(s, str) else str(s) for s in df.loc[row])
    query_line = "(" + vals + ") , "
    query = query + query_line
    
query = query[:-2] + ';'
#Insert all rows at once
my_db.execute_query(query)

# Table median_gross_income

In [None]:
table_id = 'ef7e3825-0afd-444e-997f-8a8e999f0fe7'
table = "median_gross_income"

df = loader.get_data(table_id)

df = df.dropna()
columns_to_delete = df.columns[[1,2,3,5,6,7]]
df = df.drop(columns = columns_to_delete)

# Create table
my_db.execute_query(""" CREATE TABLE {table}(
id INT AUTO_INCREMENT,
DISTRICT INT NOT NULL,
CENSAL_SECTION INT NOT NULL,
MEDIAN_GROSS_INCOME INT NOT NULL,
PRIMARY KEY (id)
)
""".format(table = table))

#Insert all data
col_names = ['DISTRICT', 'CENSAL_SECTION', 'MEDIAN_GROSS_INCOME']
columns = ", ".join(col_names)

query = 'INSERT INTO {table} ({columns}) VALUES '.format(columns = columns, table = table)
for row in tqdm(df.index):
    vals = ", ".join("'{s}'".format(s=s.replace("'", "''")) if isinstance(s, str) else str(s) for s in df.loc[row])
    query_line = "(" + vals + ") , "
    query = query + query_line
    
query = query[:-2] + ';'
#Insert all rows at once
my_db.execute_query(query)

# Table medical_centers

In [None]:
table_id = '9e135848-eb0a-4bc5-8e60-de558213b3ed'
table = "medical_centers"

df = loader.get_data(table_id)

columns_to_keep = ['geo_epgs_25831_y', 'geo_epgs_25831_x', 'name', 'secondary_filters_name']
for c in df.columns:
    if (c not in columns_to_keep):
        df = df.drop(columns = c)

# Create table
my_db.execute_query(""" CREATE TABLE {table}(
id INT AUTO_INCREMENT,
NAME VARCHAR(80) NOT NULL,
TYPE VARCHAR(60) NOT NULL,
EPSG_25831_X INT NOT NULL,
EPSG_25831_Y INT NOT NULL,
PRIMARY KEY (id)
)
""".format(table = table))

#Insert all data
col_names = [ 'TYPE', 'EPSG_25831_X', 'NAME', 'EPSG_25831_Y']
columns = ", ".join(col_names)

query = 'INSERT INTO {table} ({columns}) VALUES '.format(columns = columns, table = table)
for row in tqdm(df.index):
    vals = ", ".join("'{s}'".format(s=s.replace("'", "''")) if isinstance(s, str) else str(s) for s in df.loc[row])
    query_line = "(" + vals + ") , "
    query = query + query_line
    
query = query[:-2] + ';'
#Insert all rows at once
my_db.execute_query(query)

# Table public_transport

In [None]:
table_id = 'e07dec0d-4aeb-40f3-b987-e1f35e088ce2'
table = "public_transport"

df = loader.get_data(table_id)

columns_to_keep = ['LATITUD', 'LONGITUD', 'NOM_CAPA', 'EQUIPAMENT']
for c in df.columns:
    if (c not in columns_to_keep):
        df = df.drop(columns = c)

# Add local coordinates   
coord_transformer = Transformer.from_crs("WGS84", "EPSG:25831") # Coordinate transformer
df['EPSG_25831_X'] = coord_transformer.transform(df['LATITUD'].to_list(),df['LONGITUD'].to_list())[0]
df['EPSG_25831_Y'] = coord_transformer.transform(df['LATITUD'].to_list(),df['LONGITUD'].to_list())[1]
df = df.drop(columns = ['LATITUD', 'LONGITUD'] )

# Create table
my_db.execute_query(""" CREATE TABLE {table}(
id INT AUTO_INCREMENT,
NAME VARCHAR(65) NOT NULL,
TYPE VARCHAR(35) NOT NULL,
EPSG_25831_X INT NOT NULL,
EPSG_25831_Y INT NOT NULL,
PRIMARY KEY (id)
)
""".format(table = table))

#Insert all data
col_names = [ 'NAME', 'TYPE', 'EPSG_25831_X', 'EPSG_25831_Y']
columns = ", ".join(col_names)

query = 'INSERT INTO {table} ({columns}) VALUES '.format(columns = columns, table = table)
for row in tqdm(df.index):
    vals = ", ".join("'{s}'".format(s=s.replace("'", "''")) if isinstance(s, str) else str(s) for s in df.loc[row])
    query_line = "(" + vals + ") , "
    query = query + query_line
    
query = query[:-2] + ';'
#Insert all rows at once
my_db.execute_query(query)


# Table tourist_housing

In [None]:
table_id = 'b32fa7f6-d464-403b-8a02-0292a64883bf'
table = "tourist_housing"

df = loader.get_data(table_id)

columns_to_keep = ['LATITUD_Y', 'LONGITUD_X', 'NUMERO_PLACES']
for c in df.columns:
    if (c not in columns_to_keep):
        df = df.drop(columns = c)

# Add local coordinates   
coord_transformer = Transformer.from_crs("WGS84", "EPSG:25831") # Coordinate transformer
df['EPSG_25831_X'] = coord_transformer.transform(df['LATITUD_Y'].to_list(),df['LONGITUD_X'].to_list())[0]
df['EPSG_25831_Y'] = coord_transformer.transform(df['LATITUD_Y'].to_list(),df['LONGITUD_X'].to_list())[1]
df = df.drop(columns = ['LATITUD_Y', 'LONGITUD_X'] )

# Create table
my_db.execute_query(""" CREATE TABLE {table}(
id INT AUTO_INCREMENT,
CAPACITY INT NOT NULL,
EPSG_25831_X INT NOT NULL,
EPSG_25831_Y INT NOT NULL,
PRIMARY KEY (id)
)
""".format(table = table))

#Insert all data
col_names = [ 'CAPACITY', 'EPSG_25831_X', 'EPSG_25831_Y']
columns = ", ".join(col_names)

query = 'INSERT INTO {table} ({columns}) VALUES '.format(columns = columns, table = table)
for row in tqdm(df.index):
    vals = ", ".join("'{s}'".format(s=s.replace("'", "''")) if isinstance(s, str) else str(s) for s in df.loc[row])
    query_line = "(" + vals + ") , "
    query = query + query_line
    
query = query[:-2] + ';'
#Insert all rows at once
my_db.execute_query(query)

# Table points_of_interest

In [None]:
table_id = '31431b23-d5b9-42b8-bcd0-a84da9d8c7fa'
table = "points_of_interest"

df = loader.get_data(table_id)

# Need to filter
