# SQL Script for Creating Additional Project Tables

This SQL script creates two project tables in Google BigQuery, **'cat_pop_by_province_2015_2017'** and **'cat_percapita_income_by_province_2015_2017'**. These tables are populated with data related to population, area, density, and per capita income by province for the years 2015, 2016, and 2017.

Unlike our previous kernel focused on data cleaning, this kernel's purpose is **table creation**. These tables are essential for future analysis, especially for practicing **SQL join queries**. They provide a strong foundation for combining and analyzing data from diverse sources, demonstrating the power of SQL joins."

### Import libraries and modules

In [1]:
import pandas as pd
from google.cloud import bigquery

### Import function: Interactive SQL Query to Pandas DataFrame Converter

Import the custom query_df and run_query functions from the 'query_functions.py' file to execute SQL queries using a pre-configured BigQuery client object.

In [2]:
from query_functions import query_df  # Execute the query and return the output as a DataFrame
from query_functions import run_query  # Execute the query without returning a DataFrame, used for INSERT, UPDATE, DELETE, etc.

### Datasets and Tables paths to Google BigQuery

In [3]:
# Catalan population and density by province (2015-2017)
pop_density = "bq-analyst-230590.project_cat_mobile_coverage_2015_2017.cat_pop_by_province_2015_2017"

# Per capita income by catalan province (2015-2017)
percapita_income = "bq-analyst-230590.project_cat_mobile_coverage_2015_2017.cat_percapita_income_by_province_2015_2017"

### Create the cat_pop_by_province_2015_2017 Table

In [4]:
# Datasets: {pop_density}

# SQL query:
query = f"""
CREATE TABLE IF NOT EXISTS `{pop_density}` (
  `province` STRING,
  `year` INT64,
  `population` INT64,
  `sq_km` INT64,
  `density_per_sq_km` FLOAT64
)
    """

# Execute the query
run_query(query)

Query successfully executed, and the table has been updated.


    - Insert and update pop_density values

In [5]:
# Datasets: {pop_density}

# SQL query: fill with province, year and population values
query = f"""
INSERT INTO `{pop_density}` (
  `province`,
  `year`,
  `population`
) VALUES
  ('Barcelona', 2015, 5618162),
  ('Barcelona', 2016, 5635085),
  ('Barcelona', 2017, 5652301),
  ('Lleida', 2015, 742138),
  ('Lleida', 2016, 742099),
  ('Lleida', 2017, 741884),
  ('Girona', 2015, 765783),
  ('Girona', 2016, 766273),
  ('Girona', 2017, 766705),
  ('Tarragona', 2015, 811089),
  ('Tarragona', 2016, 810947),
  ('Tarragona', 2017, 810600)
    """

# Execute the query
run_query(query)

Query successfully executed, and the table has been updated.


In [6]:
# Datasets: {pop_density}

# SQL query: add same sq_km for each province
query = f"""
UPDATE `{pop_density}`
SET
  `sq_km` = CASE
    WHEN province = 'Barcelona' THEN 7726
    WHEN province = 'Lleida' THEN 12172
    WHEN province = 'Girona' THEN 5908
    WHEN province = 'Tarragona' THEN 6303
  END
WHERE sq_km IS NULL
    """

# Execute the query
run_query(query)

Query successfully executed, and the table has been updated.


In [7]:
# Datasets: {pop_density}

# SQL query: update density_per_sq_km value
query = f"""
UPDATE `{pop_density}`
SET density_per_sq_km = ROUND(population / sq_km,1)
WHERE density_per_sq_km IS NULL
    """

# Execute the query
run_query(query)

Query successfully executed, and the table has been updated.


    - pop_density table:

In [8]:
# Datasets: {pop_density}

# SQL query: update density_per_sq_km value
query = f"""
SELECT * 
FROM `{pop_density}`
    """

# Execute the query and display output
query_df(query)

Unnamed: 0,province,year,population,sq_km,density_per_sq_km
0,Girona,2016,766273,5908,129.7
1,Girona,2017,766705,5908,129.8
2,Girona,2015,765783,5908,129.6
3,Lleida,2015,742138,12172,61.0
4,Lleida,2016,742099,12172,61.0
5,Lleida,2017,741884,12172,61.0
6,Barcelona,2015,5618162,7726,727.2
7,Barcelona,2016,5635085,7726,729.4
8,Barcelona,2017,5652301,7726,731.6
9,Tarragona,2015,811089,6303,128.7


### Create the cat_percapita_income_by_province_2015_2017 Table

In [9]:
# Datasets: {pop_density}, {percapita_income}

# SQL query:
query = f"""
CREATE TABLE IF NOT EXISTS `{percapita_income}` (
  `year` INT64,
  `province` STRING,
  `per_capita_income` INT64
)
    """

# Execute the query
run_query(query)

Query successfully executed, and the table has been updated.


    - Insert percapita_income values

In [10]:
# Datasets: {pop_density}, {percapita_income}

# SQL query:
query = f"""
INSERT INTO `{percapita_income}` (
  `year`,
  `province`,
  `per_capita_income`
) VALUES
  (2015, 'Barcelona', 27214),
  (2016, 'Barcelona', 27913),
  (2017, 'Barcelona', 28481),
  (2015, 'Tarragona', 22486),
  (2016, 'Tarragona', 23130),
  (2017, 'Tarragona', 23534),
  (2015, 'Lleida', 20136),
  (2016, 'Lleida', 20713),
  (2017, 'Lleida', 21091),
  (2015, 'Girona', 25200),
  (2016, 'Girona', 25598),
  (2017, 'Girona', 25992)
    """

# Execute the query
run_query(query)

Query successfully executed, and the table has been updated.


    - percapita_income table:

In [11]:
# Datasets: {pop_density}, {percapita_income}

# SQL query:
query = f"""
SELECT *
FROM `{percapita_income}`
ORDER BY per_capita_income DESC
    """

# Execute the query
query_df(query)

Unnamed: 0,year,province,per_capita_income
0,2017,Barcelona,28481
1,2016,Barcelona,27913
2,2015,Barcelona,27214
3,2017,Girona,25992
4,2016,Girona,25598
5,2015,Girona,25200
6,2017,Tarragona,23534
7,2016,Tarragona,23130
8,2015,Tarragona,22486
9,2017,Lleida,21091


### Joining Population Density and Per Capita Income Data by Province and Year

In [14]:
# Datasets: {pop_density}, {percapita_income}

# SQL query:
query = f"""
SELECT
    p.province,
    p.year,
    p.population,
    p.sq_km,
    p.density_per_sq_km,
    c.per_capita_income
FROM `{pop_density}` p
JOIN `{percapita_income}` c
    ON p.province = c.province
    AND p.year = c.year
ORDER BY 6 DESC
"""

# Execute the query
query_df(query)

Unnamed: 0,province,year,population,sq_km,density_per_sq_km,per_capita_income
0,Barcelona,2017,5652301,7726,731.6,28481
1,Barcelona,2016,5635085,7726,729.4,27913
2,Barcelona,2015,5618162,7726,727.2,27214
3,Girona,2017,766705,5908,129.8,25992
4,Girona,2016,766273,5908,129.7,25598
5,Girona,2015,765783,5908,129.6,25200
6,Tarragona,2017,810600,6303,128.6,23534
7,Tarragona,2016,810947,6303,128.7,23130
8,Tarragona,2015,811089,6303,128.7,22486
9,Lleida,2017,741884,12172,61.0,21091
