In [62]:
import numpy as np
import pandas as pd
from pandas import ExcelFile
from pandas import ExcelWriter
import requests, zipfile, io
import pyodbc
import sqlalchemy
from sqlalchemy import create_engine, MetaData, Table, Column, delete, insert, select, func, sql
from sqlalchemy.types import SmallInteger, Integer, String, Float, NVARCHAR
from datetime import datetime
import pprint
import urllib
import urllib.request
import copy

requests.packages.urllib3.disable_warnings()

In [63]:
dbdir = 'C:\ICData\Test43'
# Shapefiles folder will be created within dbdir

# Download and unzip NPD Shapefiles

In [64]:
# NPD shapefiles at https://www.npd.no/en/about-us/information-services/available-data/map-services/

npd_shapefiles = {
    'AFEX': 'https://factpages.npd.no/downloads/shape/afxAreaCurrent.zip',
    'AFEX_block': 'https://factpages.npd.no/downloads/shape/afxAreaSplitByBlock.zip',
    'Licence': 'https://factpages.npd.no/downloads/shape/prlAreaCurrent.zip',
    'Licence_block': 'https://factpages.npd.no/downloads/shape/prlAreaSplitByBlock.zip',
    'Licencing APA': 'https://factpages.npd.no/downloads/shape/apaAreaGross.zip',
    'Licencing APA_block': 'https://factpages.npd.no/downloads/shape/apaAreaNet.zip',
    'Wellbore': 'https://factpages.npd.no/downloads/shape/wlbPoint.zip',
    #Ignore Wellbore - Fontfile for presentation TTF
    'BAA': 'https://factpages.npd.no/downloads/shape/baaAreaCurrent.zip',
    'BAA_block': 'https://factpages.npd.no/downloads/shape/baaAreaSplitByBlock.zip',
    'Field': 'https://factpages.npd.no/downloads/shape/fldArea.zip',
    'Discovery': 'https://factpages.npd.no/downloads/shape/dscArea.zip',
    'Facility': 'https://factpages.npd.no/downloads/shape/fclPoint.zip',
    'Survey': 'https://factpages.npd.no/downloads/shape/seaArea.zip',
    'TUF': 'https://factpages.npd.no/downloads/shape/pipLine.zip',
    'Block': 'https://factpages.npd.no/downloads/shape/blkArea.zip',
    'Quadrant': 'https://factpages.npd.no/downloads/shape/qadArea.zip',
    'Sub area': 'https://factpages.npd.no/downloads/shape/subArea.zip'
}
for key, value in npd_shapefiles.items(): 
    print(value)

https://factpages.npd.no/downloads/shape/afxAreaCurrent.zip
https://factpages.npd.no/downloads/shape/afxAreaSplitByBlock.zip
https://factpages.npd.no/downloads/shape/prlAreaCurrent.zip
https://factpages.npd.no/downloads/shape/prlAreaSplitByBlock.zip
https://factpages.npd.no/downloads/shape/apaAreaGross.zip
https://factpages.npd.no/downloads/shape/apaAreaNet.zip
https://factpages.npd.no/downloads/shape/wlbPoint.zip
https://factpages.npd.no/downloads/shape/baaAreaCurrent.zip
https://factpages.npd.no/downloads/shape/baaAreaSplitByBlock.zip
https://factpages.npd.no/downloads/shape/fldArea.zip
https://factpages.npd.no/downloads/shape/dscArea.zip
https://factpages.npd.no/downloads/shape/fclPoint.zip
https://factpages.npd.no/downloads/shape/seaArea.zip
https://factpages.npd.no/downloads/shape/pipLine.zip
https://factpages.npd.no/downloads/shape/blkArea.zip
https://factpages.npd.no/downloads/shape/qadArea.zip
https://factpages.npd.no/downloads/shape/subArea.zip


In [65]:
# https://factpages.npd.no/downloads/shape/afxAreaCurrent.zip
# See https://stackoverflow.com/questions/9419162/download-returned-zip-file-from-url

def save_shapefiles():
    
    for key, value in npd_shapefiles.items(): 
        
        filepath = '{}\Shapefiles\\'.format(dbdir)
        zip_file_url = value

        print('Beginning file download with requests: ', zip_file_url)
        r = requests.get(zip_file_url, verify=False)
        # See https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings

        z = zipfile.ZipFile(io.BytesIO(r.content))
        z.extractall(filepath)

        print('Files extracted to: {}'.format(filepath))
        
save_shapefiles()

Beginning file download with requests:  https://factpages.npd.no/downloads/shape/afxAreaCurrent.zip
Files extracted to: C:\ICData\Test43\Shapefiles\
Beginning file download with requests:  https://factpages.npd.no/downloads/shape/afxAreaSplitByBlock.zip
Files extracted to: C:\ICData\Test43\Shapefiles\
Beginning file download with requests:  https://factpages.npd.no/downloads/shape/prlAreaCurrent.zip
Files extracted to: C:\ICData\Test43\Shapefiles\
Beginning file download with requests:  https://factpages.npd.no/downloads/shape/prlAreaSplitByBlock.zip
Files extracted to: C:\ICData\Test43\Shapefiles\
Beginning file download with requests:  https://factpages.npd.no/downloads/shape/apaAreaGross.zip
Files extracted to: C:\ICData\Test43\Shapefiles\
Beginning file download with requests:  https://factpages.npd.no/downloads/shape/apaAreaNet.zip
Files extracted to: C:\ICData\Test43\Shapefiles\
Beginning file download with requests:  https://factpages.npd.no/downloads/shape/wlbPoint.zip
Files ex

In [66]:
from os import walk

lst_shapefiles = []
for (dirpath, dirnames, filenames) in walk(dbdir):
    lst_shapefiles.extend(filenames)
    
#print(lst_shapefiles)

lst_shp = [i for i in lst_shapefiles if '.shp' in i and '.xml' not in i]
print(lst_shp)

['afxAreaCurrent.shp', 'afxAreaSplitByBlock.shp', 'apaAreaGross.shp', 'apaAreaNet.shp', 'baaAreaCurrent.shp', 'baaAreaSplitByBlock.shp', 'blkArea.shp', 'dscArea.shp', 'fclPoint.shp', 'fldArea.shp', 'pipLine.shp', 'prlAreaCurrent.shp', 'prlAreaSplitByBlock.shp', 'qadArea.shp', 'seaArea.shp', 'subArea.shp', 'wlbPoint.shp']


In [67]:
# Names for IC

# Areas with planned survey after 1.1. 2009. The dataset contains both gross (included turn area for the boat) and net (acquisition area) for seismic, electromagnetic, site and basement surveys.

In [71]:
'shapefile_descriptions_full' = {'AFEX': 'Current areas fee with area fee excemption',
    'AFEX': 'Current areas fee with area excemption split by block',
    'Licence': 'Current production licence areas, “active” with overall geometry, “inactive” without geometry. Overall geometry means a dissolved polygon including both stratigraphical and non- stratigraphical areas',
    'Licence': 'Complete area history split by block. “Active” licence areas can be found by querying “dtValTo” = NULL',
    'Licencing APA': 'Updated APA (Awards in predefined areas) gross areas',
    'Licencing APA': 'APA (Awards in predefined areas) net areas',
    'Wellbore': 'Exploration, development and shallow wellbores',
    'Wellbore': 'Fontfile for presentation',
    'BAA': 'Business arrangement areas',
    'BAA': 'Business arrangement areas. Complete history for each area split by block',
    'Field': 'Field outlines',
    'Discovery': 'Discovery outlines - including field outlines',
    'Facility': 'Fixed facilities, floating production facilities and main facilities onshore',
    'Survey': 'Areas with planned survey after 1.1. 2009. The dataset contains both gross (included turn area for the boat) and net (acquisition area) for seismic, electromagnetic, site and basement surveys',
    'TUF': 'Main piplelines. The dataset contains not infield pipelines',
    'Block': 'All blocks on the Norwegian continental shelf',
    'Quadrant': 'All quadrants on the Norwegian continental shelf',
    'Sub area': 'All sub-areas on the Norwegian continental shelf'
}

SyntaxError: can't assign to literal (<ipython-input-71-11890adb9141>, line 1)

# Import shapefiles to IC

In [11]:
# Import data to IC myself, prepare all formatting and save query output to Excel
# Three tables necessary

# Alternatively, generate 'INSERT INTO' script from dbo.SHAPEDATA
# See https://stackoverflow.com/questions/4526461/converting-select-results-into-insert-script-sql-server

In [56]:
params = 'DRIVER={ODBC Driver 13 for SQL Server};' \
         'SERVER=5SQFPQ2\SQLEXPRESS;' \
         'PORT=1433;' \
         'DATABASE=Test43;' \
         'Trusted_Connection=yes;'
            
params = urllib.parse.quote_plus(params)

engine = create_engine('mssql+pyodbc:///?odbc_connect=%s' % params, echo = True)

metadata = MetaData()
  
connection = engine.connect()

2020-02-29 19:31:10,484 INFO sqlalchemy.engine.base.Engine SELECT CAST(SERVERPROPERTY('ProductVersion') AS VARCHAR)
2020-02-29 19:31:10,484 INFO sqlalchemy.engine.base.Engine ()
2020-02-29 19:31:10,486 INFO sqlalchemy.engine.base.Engine SELECT schema_name()
2020-02-29 19:31:10,487 INFO sqlalchemy.engine.base.Engine ()
2020-02-29 19:31:10,490 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2020-02-29 19:31:10,491 INFO sqlalchemy.engine.base.Engine ()
2020-02-29 19:31:10,492 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS NVARCHAR(60)) AS anon_1
2020-02-29 19:31:10,493 INFO sqlalchemy.engine.base.Engine ()


### Populate SHAPEDATA

In [60]:
df_SHAPEDATA = pd.read_excel('{}\Shapefiles\shapefiles_data.xlsx'.format(dbdir), sheet_name='SHAPEDATA')
df_SHAPEDATA

Unnamed: 0,category,shape_id,project_id,title,path,source_coords,palette,activeindex,selection,svbrush,...,filtervalue,plotsymbol,chartstyle,chartsize,chartminimum,chartmaximum,chartautoscale,projection,datum,utmzone
0,2,1,-1,APA Gross (apaAreaGross),.\Shapefiles\apaAreaGross.shp,,,idLicRound,,1;8421504;16777215,...,,0,0,0,0,0,1,0,4230,
1,2,2,-1,APA Open (apaAreaNet),.\Shapefiles\apaAreaNet.shp,,,apaPolyId,,8;8421504;16777215,...,,0,0,0,0,0,1,0,4230,
2,1,3,-1,Field_Producing (fldArea),.\Shapefiles\fldArea.shp,,,idField,,1;8421504;16777215,...,Producing,0,0,0,0,0,1,0,4230,
3,1,4,-1,Discovery_HC type (dscArea),.\Shapefiles\dscArea.shp,,,idDiscover,,8;8421504;16777215,...,,0,0,0,0,0,1,0,4230,
4,3,5,-1,Blocks (blkArea),.\Shapefiles\blkArea.shp,,,idBlock__,,1;8421504;16777215,...,,0,0,0,0,0,1,0,4230,
5,3,6,-1,Quadrants (qadArea),.\Shapefiles\qadArea.shp,,,quadrant,,1;8421504;16777215,...,,0,0,0,0,0,1,0,4230,
6,3,7,-1,Sub-areas (subArea),.\Shapefiles\subArea.shp,,,NPDID_sub_,,1;8421504;16777215,...,,0,0,0,0,0,1,0,4230,
7,4,8,-1,Pipelines (pipLine),.\Shapefiles\pipLine.shp,,,idPipeline,,8;8421504;16777215,...,,0,0,0,0,0,1,0,4230,
8,5,9,-1,Surveys_Ongoing (seaArea),.\Shapefiles\seaArea.shp,,,survey_nam,,8;16772829;16777215,...,Pågående,0,0,0,0,0,1,0,4230,
9,5,10,-1,Surveys_Planned (seaArea),.\Shapefiles\seaArea.shp,,,survey_nam,,8;16772829;16777215,...,Planlagt,0,0,0,0,0,1,0,4230,


In [61]:
df_SHAPEDATA.to_sql('SHAPEDATA', engine, if_exists='append', index = False)

2020-02-29 19:33:32,617 INFO sqlalchemy.engine.base.Engine SELECT [INFORMATION_SCHEMA].[COLUMNS].[TABLE_SCHEMA], [INFORMATION_SCHEMA].[COLUMNS].[TABLE_NAME], [INFORMATION_SCHEMA].[COLUMNS].[COLUMN_NAME], [INFORMATION_SCHEMA].[COLUMNS].[IS_NULLABLE], [INFORMATION_SCHEMA].[COLUMNS].[DATA_TYPE], [INFORMATION_SCHEMA].[COLUMNS].[ORDINAL_POSITION], [INFORMATION_SCHEMA].[COLUMNS].[CHARACTER_MAXIMUM_LENGTH], [INFORMATION_SCHEMA].[COLUMNS].[NUMERIC_PRECISION], [INFORMATION_SCHEMA].[COLUMNS].[NUMERIC_SCALE], [INFORMATION_SCHEMA].[COLUMNS].[COLUMN_DEFAULT], [INFORMATION_SCHEMA].[COLUMNS].[COLLATION_NAME] 
FROM [INFORMATION_SCHEMA].[COLUMNS] 
WHERE [INFORMATION_SCHEMA].[COLUMNS].[TABLE_NAME] = CAST(? AS NVARCHAR(max)) AND [INFORMATION_SCHEMA].[COLUMNS].[TABLE_SCHEMA] = CAST(? AS NVARCHAR(max))
2020-02-29 19:33:32,617 INFO sqlalchemy.engine.base.Engine ('SHAPEDATA', 'dbo')
2020-02-29 19:33:32,621 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2020-02-29 19:33:32,623 INFO sqlalchemy.engine.base

### Populate T_GISFOLDERS

In [17]:
df_T_GISFOLDERS = pd.read_excel('{}\Shapefiles\shapefiles_data.xlsx'.format(dbdir), sheet_name='T_GISFOLDERS')
df_T_GISFOLDERS

# Note: removed first row "pk_index	f_key	f_value: 1	/Version	1"

Unnamed: 0,f_key,f_value
0,FolderIdCounter,5
1,Folder:1:Info,"{""FolderName"":""NPD Hydrocarbon""}"
2,Folder:1:Parent,ProjectFolder(4)
3,Folder:ProjectFolder(4):SubFolders,"[""1"",""2"",""3"",""4"",""5""]"
4,Folder:2:Info,"{""FolderName"":""NPD Licensing""}"
5,Folder:2:Parent,ProjectFolder(4)
6,Folder:3:Info,"{""FolderName"":""NPD Borders & Areas""}"
7,Folder:3:Parent,ProjectFolder(4)
8,Folder:4:Info,"{""FolderName"":""NPD Facilities & Pipelines""}"
9,Folder:4:Parent,ProjectFolder(4)


In [58]:
df_T_GISFOLDERS.to_sql('T_GISFOLDERS', engine, if_exists='append', index = False)

2020-02-29 19:31:22,502 INFO sqlalchemy.engine.base.Engine SELECT [INFORMATION_SCHEMA].[COLUMNS].[TABLE_SCHEMA], [INFORMATION_SCHEMA].[COLUMNS].[TABLE_NAME], [INFORMATION_SCHEMA].[COLUMNS].[COLUMN_NAME], [INFORMATION_SCHEMA].[COLUMNS].[IS_NULLABLE], [INFORMATION_SCHEMA].[COLUMNS].[DATA_TYPE], [INFORMATION_SCHEMA].[COLUMNS].[ORDINAL_POSITION], [INFORMATION_SCHEMA].[COLUMNS].[CHARACTER_MAXIMUM_LENGTH], [INFORMATION_SCHEMA].[COLUMNS].[NUMERIC_PRECISION], [INFORMATION_SCHEMA].[COLUMNS].[NUMERIC_SCALE], [INFORMATION_SCHEMA].[COLUMNS].[COLUMN_DEFAULT], [INFORMATION_SCHEMA].[COLUMNS].[COLLATION_NAME] 
FROM [INFORMATION_SCHEMA].[COLUMNS] 
WHERE [INFORMATION_SCHEMA].[COLUMNS].[TABLE_NAME] = CAST(? AS NVARCHAR(max)) AND [INFORMATION_SCHEMA].[COLUMNS].[TABLE_SCHEMA] = CAST(? AS NVARCHAR(max))
2020-02-29 19:31:22,503 INFO sqlalchemy.engine.base.Engine ('T_GISFOLDERS', 'dbo')
2020-02-29 19:31:22,526 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2020-02-29 19:31:22,527 INFO sqlalchemy.engine.b

### Populate GISPALETTES

In [44]:
df_GISPALETTES = pd.read_excel('{}\Shapefiles\shapefiles_data.xlsx'.format(dbdir), sheet_name='GISPALETTES')
df_GISPALETTES

# Note: 2 rows exist, just append additional rows

Unnamed: 0,ptype,pal_id,name,f_default,f_data
0,0,104,npd_stratigraph,1;16777215;16777215,0x02000000020000004E4F13000000353B313637373732...
1,0,105,npd_curActStat,1;16777215;16777215,0x0300000017000000417070726F76656420666F722070...
2,0,106,npd_Dctype,1;16777215;16777215,0x050000000000000013000000313B3133383832333233...
3,1,107,npd_medium,0;0;0,0x030000000A000000436F6E64656E736174650C000000...


In [45]:
df_GISPALETTES.dtypes

# Note that column f_data is dtype varbinary(max)
# Can't write df to sql because of error:
# Implicit conversion from data type nvarchar to varbinary(max) is not allowed. 
# Use the CONVERT function to run this query. 
# This works: VALUES (CONVERT(varbinary(max), 'value'))
# So generate INSERT INTO statement

ptype         int64
pal_id        int64
name         object
f_default    object
f_data       object
dtype: object

In [47]:
for row in df_GISPALETTES['f_data']:
    print(row, '\n')

0x02000000020000004E4F13000000353B31363737373231353B3131313834383935030000005945530E000000353B31363737373231353B323535 

0x0300000017000000417070726F76656420666F722070726F64756374696F6E13000000313B31363737373231353B31363737373231350900000050726F647563696E6713000000383B31363737373231353B3136373131383038090000005368757420646F776E13000000313B31363737373231353B3136373737323135 

0x050000000000000013000000313B31333838323332333B31363737373231350300000047415313000000383B31323631353933353B31363737373231350E0000004741532F434F4E44454E5341544513000000353B31323631353933353B3134313035383237030000004F494C12000000383B363533393739313B3136373737323135070000004F494C2F47415312000000353B363533393739313B3136373131393335 

0x030000000A000000436F6E64656E736174650C000000303B303B31363734343730330300000047617307000000303B303B323535030000004F696C09000000303B303B3332373638 



In [59]:
sql = '''

INSERT INTO GISPALETTES 
(ptype, pal_id, name, f_default, f_data) 
VALUES 
('0', '104', 'npd_stratigraph', '1;16777215;16777215', CONVERT(varbinary(max), '0x02000000020000004E4F13000000353B31363737373231353B3131313834383935030000005945530E000000353B31363737373231353B323535')),
('0', '105', 'npd_curActStat', '1;16777215;16777215', CONVERT(varbinary(max), '0x0300000017000000417070726F76656420666F722070726F64756374696F6E13000000313B31363737373231353B31363737373231350900000050726F647563696E6713000000383B31363737373231353B3136373131383038090000005368757420646F776E13000000313B31363737373231353B3136373737323135')),
('0', '106', 'npd_Dctype', '1;16777215;16777215', CONVERT(varbinary(max), '0x050000000000000013000000313B31333838323332333B31363737373231350300000047415313000000383B31323631353933353B31363737373231350E0000004741532F434F4E44454E5341544513000000353B31323631353933353B3134313035383237030000004F494C12000000383B363533393739313B3136373737323135070000004F494C2F47415312000000353B363533393739313B3136373131393335')),
('1', '107', 'npd_medium', '0;0;0', CONVERT(varbinary(max), '0x030000000A000000436F6E64656E736174650C000000303B303B31363734343730330300000047617307000000303B303B323535030000004F696C09000000303B303B3332373638'));

'''

#pd.read_sql_query(sql, engine)
with engine.begin() as conn:
    conn.execute(sql)

2020-02-29 19:31:29,728 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2020-02-29 19:31:29,729 INFO sqlalchemy.engine.base.Engine 

INSERT INTO GISPALETTES 
(ptype, pal_id, name, f_default, f_data) 
VALUES 
('0', '104', 'npd_stratigraph', '1;16777215;16777215', CONVERT(varbinary(max), '0x02000000020000004E4F13000000353B31363737373231353B3131313834383935030000005945530E000000353B31363737373231353B323535')),
('0', '105', 'npd_curActStat', '1;16777215;16777215', CONVERT(varbinary(max), '0x0300000017000000417070726F76656420666F722070726F64756374696F6E13000000313B31363737373231353B31363737373231350900000050726F647563696E6713000000383B31363737373231353B3136373131383038090000005368757420646F776E13000000313B31363737373231353B3136373737323135')),
('0', '106', 'npd_Dctype', '1;16777215;16777215', CONVERT(varbinary(max), '0x050000000000000013000000313B31333838323332333B31363737373231350300000047415313000000383B31323631353933353B31363737373231350E0000004741532F434F4E44454E5341544513000000353B

In [None]:
# Didn't work - folders though added to table do not appear - shapefiles on their own. And adding a shapefile to map, nothing happens.