# Preparation of the Geometric Polygons for Postal Code Visualization

#### Notebook Purpose:
The purpose of this notebook is to prepare the polygons of PLZ2 and PLZ5 level.
The datasets are saved as BigQuery Tables with the polygons transformed to the: geography data type.




### Loading Data

In [None]:
# authenticate user

from google.colab import auth

auth.authenticate_user()

In [None]:
# set parameters to use BigQuery Functionality

PROJECT_ID = 'solarinsight-383513' #@param {type: "string"}
!gcloud config set project {PROJECT_ID}

In [None]:
import google.cloud.bigquery as bq

client = bq.Client(project=PROJECT_ID)

In [None]:
# copy the file from the bucket/folder_name/file to our colab notebook

!gsutil cp gs://bucket-quickstart-solarinsight/geo_data/plz-5stellig.geojson .
!gsutil cp gs://bucket-quickstart-solarinsight/geo_data/plz-2stellig.geojson .


Copying gs://bucket-quickstart-solarinsight/geo_data/plz-5stellig.geojson...
\ [1 files][ 21.2 MiB/ 21.2 MiB]                                                
Operation completed over 1 objects/21.2 MiB.                                     
Copying gs://bucket-quickstart-solarinsight/geo_data/plz-2stellig.geojson...
\ [1 files][  7.2 MiB/  7.2 MiB]                                                
Operation completed over 1 objects/7.2 MiB.                                      


In [None]:
# install geopandas used to read the geojson files

!pip install geopandas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting geopandas
  Downloading geopandas-0.13.2-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m20.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fiona>=1.8.19 (from geopandas)
  Downloading Fiona-1.9.4.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.4/16.4 MB[0m [31m72.3 MB/s[0m eta [36m0:00:00[0m
Collecting pyproj>=3.0.1 (from geopandas)
  Downloading pyproj-3.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m80.8 MB/s[0m eta [36m0:00:00[0m
Collecting click-plugins>=1.0 (from fiona>=1.8.19->geopandas)
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Collecting cligj>=0.5 (from fiona>=1.8.19->geopandas

In [None]:
# read the geojson files

import geopandas as gpd
geo_data_plz5 = gpd.read_file('plz-5stellig.geojson')
geo_data_plz2 = gpd.read_file('plz-2stellig.geojson')


### Data Preprocessing

In [None]:
# extract the PLZ2 from PLZ5

geo_data_plz5['plz2'] = geo_data_plz5['plz'].apply(lambda x: x[:2])
geo_data_plz5 = geo_data_plz5.rename(columns = {'geometry':'geometry5', 'plz':'plz5'})

geo_data_plz5.head()

Unnamed: 0,plz5,note,qkm,einwohner,geometry5,plz2
0,1067,01067 Dresden,6.866862,11957,"POLYGON ((13.68689 51.06395, 13.68698 51.06344...",1
1,1069,01069 Dresden,5.351833,25491,"MULTIPOLYGON (((13.72009 51.04664, 13.72162 51...",1
2,1097,01097 Dresden,3.297752,14821,"POLYGON ((13.72548 51.06860, 13.73000 51.06477...",1
3,1099,01099 Dresden,58.506789,28018,"POLYGON ((13.74218 51.08979, 13.74199 51.08963...",1
4,1108,01108 Dresden,16.447222,5876,"POLYGON ((13.76543 51.17491, 13.76282 51.17392...",1


In [None]:
geo_data_plz2 = geo_data_plz2.rename(columns = {'geometry':'geometry2', 'plz':'plz2'})
geo_data_plz2.head()

Unnamed: 0,plz2,qkm,einwohner,geometry2
0,1,5097.98,1227440,"POLYGON ((13.63115 51.50731, 13.63050 51.50712..."
1,2,3477.04,445361,"POLYGON ((14.13494 51.54129, 14.13149 51.53860..."
2,3,3113.65,304953,"POLYGON ((13.54857 51.74694, 13.54681 51.74622..."
3,4,6206.45,1200630,"MULTIPOLYGON (((12.17924 51.38758, 12.17930 51..."
4,6,9375.81,1336123,"MULTIPOLYGON (((10.89703 51.61191, 10.90103 51..."


### BigQuery Table Creation

In [None]:
# using the pandas google big query library we can save the dataframe directly as table and replace if it already exists

import pandas_gbq

table_name = 'geo_data.plz2_polygone'
pandas_gbq.to_gbq(geo_data_plz2, table_name, project_id=PROJECT_ID, if_exists='replace')

In [None]:
table_name = 'geo_data.plz5_polygone'
pandas_gbq.to_gbq(geo_data_plz5, table_name, project_id=PROJECT_ID, if_exists='replace')

### Bigquery Spatial Temporal Data Type Transformation

In [None]:
from google.cloud import bigquery

table_name = f'{PROJECT_ID}.geo_data.plz2_polygone'

# Create a BigQuery client
client = bigquery.Client(project=PROJECT_ID)

# Define the query to convert the column to geography
query = '''
SELECT
  plz2, ST_GEOGFROMTEXT(geometry2, make_valid => TRUE) AS polygon, qkm, einwohner
FROM
  `geo_data.plz2_polygone`
'''

# Define the configuration for the query job
job_config = bigquery.QueryJobConfig(destination=table_name,
    write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE)

# Start the query job
job = client.query(query, job_config=job_config)

job.result()

In [None]:
table_name = f'{PROJECT_ID}.geo_data.plz5_polygone'

# Create a BigQuery client
client = bigquery.Client(project=PROJECT_ID)

# Define the query to convert the column to geography
query = '''
SELECT
  plz5, plz2, ST_GEOGFROMTEXT(geometry5, make_valid => TRUE) AS polygon, qkm, einwohner, note
FROM
  `geo_data.plz5_polygone`
'''

# Define the configuration for the query job
job_config = bigquery.QueryJobConfig(destination=table_name,
    write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE)

# Start the query job
job = client.query(query, job_config=job_config)

job.result()