## DEMO Part 00 import and prepare data as dataframes

In [1]:
from datetime import datetime as dt
t0 = dt.utcnow()

### Load Paleo 'Slip Rate' from excel spreadsheet

In [2]:
import pandas as pd
import os
path = "/geodata/PaleoseismicSiteDatabase/Current"
paleo_xlsx = os.path.join(path, "SiteDB_v1.1_200529.xlsx")

## use header=[0,1]to setup MultiIndex
slip_rate_df = pd.read_excel(paleo_xlsx, sheet_name= 'Slip Rate', header = [0,1])

In [3]:
slip_rate_df

Unnamed: 0_level_0,Fault,Fault,Fault,Fault,Fault,Fault,Fault,Fault,Fault,Site Data,Site Data,Site Data,Site Data,Site Data,Site Data,Site Data,Site Data,Site Data,Site Data,Site Data,Site Data
Unnamed: 0_level_1,Fault,CFM name,CFM #,CFM\nSense\nDominant,CFM\nSense\nSecondary,CFM\nDip\n(°),CFM\nRake\n(°),CFM\nNet SR\n (mm/yr),CFM Net SR \ncomments,Site DB ID,...,No. of events Pref.,No. of events Min.,No. of events Max.,Comments regarding SR,Comments regarding Dating,Data source,Last updated,Updated by,Last QA'd,QA'd by
0,Vernon,Vernon 1,413.0,Reverse,Dextral,65 ± 10,290 ± 10,4.5 (2.5-5),Unchanged from 2014 AFM,1,...,,,,Bartholomew et al. (2014) only report SR in co...,,Bartholomew et al. (2014),2020-05-06,Nicola Litchfield,,
1,,,,,,,,,,2,...,,,,Bartholomew et al. (2014) only report SR in co...,,Bartholomew et al. (2014),2020-05-06,Nicola Litchfield,,
2,Vernon,Vernon 2,412.0,Dextral,,80 ± 10,360 ± 10,4.5 (2.5-5),Unchanged from 2014 AFM,3,...,,,,Bartholomew et al. (2014) only report SR in co...,OSL age uncertainties 1 sigma,Bartholomew et al. (2014),2020-05-06,Nicola Litchfield,,
3,,,,,,,,,,4,...,,,,Bartholomew et al. (2014) only report SR in co...,OSL age uncertainties 1 sigma,Bartholomew et al. (2014),2020-05-06,Nicola Litchfield,,
4,Wellington,Wellington Hut Valley 5,292.0,Dextral,Normal,65 ± 10,30 ± 10,6.3 (5.1-8.2),Unchanged from 2014 AFM,7,...,,,,Ninis et al. (2013) only report SR in componen...,OSL age uncertainties 2 sigma,Ninis et al. (2013),2020-05-14,Nicola Litchfield,,
5,,,,,,,,,,8,...,,,,Ages and offset from Ninis et al. (2013) using...,OSL age uncertainties 2 sigma,Ninis et al. (2013),2020-05-14,Nicola Litchfield,,
6,,,,,,,,,,9,...,,,,Reported SR of Ninis et al. (2013) as only one...,OSL age uncertainties 2 sigma,Ninis et al. (2013),2020-05-14,Nicola Litchfield,,
7,,,,,,,,,,11,...,4.0,,,Reported values of Little et al. (2010) as onl...,OSL age uncertainties 1 sigma,Little et al. (2010),2020-05-15,Nicola Litchfield,,


In [4]:
# flatten column names
slip_rate_df.columns = [f"{x}_{y}" for x, y in slip_rate_df.columns.to_flat_index()]

# focus on interesting columns, using MultiIndex tuples
slip_rate_df.filter(items=['Site Data_Site DB ID', 
                           'Site Data_Site Name', 
                           'Site Data_Easting (NZTM)', 
                           'Site Data_Northing (NZTM)'])

# get column types
# slip_rate_df.filter(items=['Site Data_Site DB ID', 
#                            'Site Data_Site Name']).dtypes

Unnamed: 0,Site Data_Site DB ID,Site Data_Site Name,Site Data_Easting (NZTM),Site Data_Northing (NZTM)
0,1,2,1685350.0,5390753.0
1,2,7,1686050.0,5392467.0
2,3,35; Flaxey splay,1689691.0,5398181.0
3,4,43; Homestead Stream,1690023.0,5398339.0
4,7,Emerald Hill; EHT5-T6,1777235.0,5448284.0
5,8,Emerald Hill; \nEHT2-T3-T4,1777132.0,5448409.0
6,9,Emerald Hill;\nEHT2-T3,1777344.0,5448532.0
7,11,Te Marua terraces,1777968.0,5448830.0


In [5]:
# change column type
slip_rate_df['Site Data_Site Name'] = slip_rate_df['Site Data_Site Name'].astype(str)
slip_rate_df['Site Data_Site Name']

0                             2
1                             7
2              35; Flaxey splay
3          43; Homestead Stream
4         Emerald Hill; EHT5-T6
5    Emerald Hill; \nEHT2-T3-T4
6        Emerald Hill;\nEHT2-T3
7             Te Marua terraces
Name: Site Data_Site Name, dtype: object

### Load SiteDB from Shapefile

In [6]:
%%time
t10 = dt.utcnow()
import geopandas as gpd

sites_gdf = gpd.read_file(os.path.join(path, "GIS/SiteDB_V1_200526.shp"))

CPU times: user 208 ms, sys: 28 ms, total: 236 ms
Wall time: 238 ms


In [7]:
sites_gdf.columns

Index(['OBJECTID', 'FIELD_NUMB', 'FEATURE_NA', 'METHOD', 'METHOD_ACC',
       'FAULT_FEAT', 'GEO_FEATUR', 'GEO_INFO', 'DOM_SENSE', 'SUB_SENSE',
       'DOWN_QUAD', 'DIP', 'DIP_DIR', 'NET_TREND', 'NET_PLUNGE', 'STRIKE_DIS',
       'STRIKE_TIM', 'STRIKE_SR', 'STRIKE_EVE', 'STRIKE_SED', 'VERT_DISP',
       'VERT_TIME', 'VERT_SR', 'VERT_EVENT', 'VERT_SED', 'DIP_DISP',
       'DIP_TIME', 'DIP_SR', 'DIP_EVENTS', 'DIP_SED', 'NET_DISP', 'NET_TIME',
       'NET_SR', 'NET_EVENTS', 'NET_SED', 'RI', 'LE', 'BIB_ID', 'TRACE_ID',
       'EASTING', 'NORTHING', 'POINT_ID', 'SOURCE', 'DATE_', 'CONFID',
       'ACCESS_', 'AUTHOR', 'OWNER', 'OTHER_INFO', 'POINT_X', 'POINT_Y',
       'Parameter', 'SiteDB_ID', 'geometry'],
      dtype='object')

In [9]:
sites_gdf.filter(items=['SiteDB_ID', 'geometry', 'FEATURE_NA'])

Unnamed: 0,SiteDB_ID,geometry,FEATURE_NA
0,001,POINT (1685350.000 5390753.000),
1,002,POINT (1686050.032 5392467.073),
2,003,POINT (1688883.169 5397088.581),Flaxey splay
3,005,POINT (1689691.000 5398181.000),
4,004,POINT (1690022.833 5398338.833),Homestead Stream
...,...,...,...
369,,POINT (1692158.221 5394755.979),
370,,POINT (1684597.737 5389841.334),
371,,POINT (1682355.389 5387817.267),
372,,POINT (1680582.677 5386831.693),


### Load Active Fault DB from Shapefile

In [9]:
%%time
path2 = "/geodata/CFM time-stamped versions and publications/NZ CFM V0.2_May 2020"
nzafd_gdf = gpd.read_file(os.path.join(path2, "GIS/NZAFD_July_2017.shp"))

CPU times: user 448 ms, sys: 20 ms, total: 468 ms
Wall time: 467 ms


In [10]:
nzafd_gdf[nzafd_gdf['ACCURACY'] == 'Accurate']

Unnamed: 0,NAME,REC_INTERV,SECTION,AUTHOR,SOURCE,ACCURACY,DOWNQUAD,SLIPRATE,SUBSLIPTYP,DIP,DISPLACEME,LASTEVENT,DOMSLIPTYP,DIPDIRECTI,EVENT,geometry
0,Wairoa North Fault,,,,,Accurate,SW,Unknown,Unknown,Unknown,Unknown,Unknown,Normal,W,,"LINESTRING (1786021.356 5894921.191, 1786101.6..."
1,Wairoa North Fault,,,,,Accurate,SW,Unknown,Unknown,Unknown,Unknown,Unknown,Normal,W,,"LINESTRING (1787092.714 5891932.979, 1787176.1..."
2,Wairoa North Fault,,,,,Accurate,SW,Unknown,Unknown,Unknown,Unknown,Unknown,Normal,W,,"LINESTRING (1787223.745 5891533.191, 1787355.1..."
3,Wairoa South Fault,,,,,Accurate,SW,Unknown,Unknown,Unknown,Unknown,Unknown,Normal,W,,"LINESTRING (1790189.047 5886042.740, 1790293.4..."
6,Wairoa South Fault,,,,,Accurate,SW,Unknown,Unknown,Unknown,Unknown,Unknown,Normal,W,,"LINESTRING (1789494.507 5888518.211, 1789557.8..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9708,,,,,Kaikoura 2016,Accurate,SE,Unknown,Dextral,Unknown,Unknown,Historical,Unknown,Unknown,Kaikoura 2016,"LINESTRING (1676333.465 5347460.265, 1676278.4..."
9709,Fidget Fault,,,,,Accurate,,,,,,,,,,"LINESTRING (1655784.037 5335639.606, 1656062.5..."
9710,Fidget Fault,,,,,Accurate,,,,,,,,,,"LINESTRING (1651704.505 5334684.870, 1651767.9..."
9711,Fidget Fault,,,,,Accurate,,,,,,,,,,"LINESTRING (1650271.937 5334465.167, 1650330.1..."


In [11]:
# OVERWRITE CFM with latest from Hannu
# sites_gdf = gpd.read_file(os.path.join(path2, "GIS/NZ_CFM_v1_010520.shp"))