# Create `stationlist.csv` and `site_model_stations.csv`

##  Configuration

In [None]:
# Import dependencies
import os
import glob
import numpy as np
import pandas as pd

import geopandas as gpd
import contextily as ctx
import matplotlib.pyplot as plt

import utils
import usgs_get_vs30

from openquake.commands.prepare_site_model import main as oq_site_model
from openquake.hazardlib.site import calculate_z1pt0, calculate_z2pt5


## User input

In [None]:
# Events list from ECD
event = 'DRAFT_20150425_M7.8_Gorkha'

try:
    file_path = glob.glob(os.path.join(
        '..', '*', event, 
        'Recording_Stations', 
        'Stations_Unique.csv'))[0]
except IndexError:
    # If not `Stations_Unique` then use the only Stations_REF available
    stsfiles = glob.glob(os.path.join(
        '..', '*', event, 
        'Recording_Stations', 
        'Stations_*.csv'))
    assert len(stsfiles) == 1, 'Multiple Stations_*.csv files found but no Unique file'
    file_path = stsfiles[0]

print('Reference file for OQ calculation:\n   ', 
      file_path)

df = pd.read_csv(file_path)

## Prepare `stationlist.csv` following OQ format
- Remove columns with missing IM values (OQ only accepts stations with values in all IMs)
- Remove Vs30 data (it will be provided in the site model file)


In [None]:
# Remove columns with missing IM values

seismic = df.STATION_TYPE == 'seismic'
macroseismic = df.STATION_TYPE == 'macroseismic'
print('Number of stations:', len(df))
print('  # seismic stations:', len(df[seismic]))
print('  # macroseicmic stations:', len(df[macroseismic]))

# Find columns with values
vals = [x for x in df.columns if x.endswith('_VALUE')]

# Print the number of missing values in columns
print('\nMissing vals in SEISMIC stations:\n', 
      df.loc[seismic, vals].isnull().sum())

print('\nMissing vals in MACROSEISMIC stations:\n', 
      df.loc[macroseismic, vals].isnull().sum())


In [None]:
# Select IMTs to use in OQ
imts = ['PGA_VALUE'] # Other of interest: 'SA(0.3)_VALUE', 'SA(0.6)_VALUE', 'SA(1.0)_VALUE'

# Columns to be used in OQ 
cols = ['STATION_ID', 'STATION_NAME', 'LONGITUDE', 'LATITUDE', 'STATION_TYPE']

for imt in imts:
    cols.append(imt)
    cols.append(imt.replace('VALUE', 'LN_SIGMA'))

cols_vs = []
for vcol in ['SOIL_TYPE', 'VS30', 'VS30_TYPE']:
    if vcol in df.columns:
        cols_vs.append(vcol)
        
# Drop rows with empty values
df = df[cols + cols_vs].dropna(axis=0, how='any', subset=imts)

# OQ only reads 5 decimal places
df = df.round(5).copy()

# Drop duplicates
df.drop_duplicates(subset=['LONGITUDE', 'LATITUDE'], inplace=True)

print('Filtered number of stations:', len(df))

### Save files

In [None]:
# Save files in folder OpenQuake_gmfs
folder = file_path[:file_path.find('Recording_Stations')]
stations = df[cols].copy()

seismic = stations.STATION_TYPE == 'seismic'

if 'seismic' in stations.STATION_TYPE.unique():
    # Save stationlis with only seismic stations
    out_path = os.path.join(folder, 'OpenQuake_gmfs', 'stationlist_seismic.csv')
    stations[seismic].to_csv(out_path, encoding='utf-8', index=False)
    print('\n Saving:\n', out_path)

if 'macroseismic' in stations.STATION_TYPE.unique():
    # Save stationlis complete file (seismic + macroseismic)
    out_path = os.path.join(folder, 'OpenQuake_gmfs', 'stationlist_all.csv')
    stations.to_csv(out_path, encoding='utf-8', index=False)
    print('\n Saving:\n', out_path)


## Prepare `site_model_stations.csv` file
- Add qualitative Vs30 values
- Add missing Vs30 from USGS
- Estimate z1 and z2.5

### Adjust Vs30 qualitative values

In [None]:
# Create DataFrame for site model
df_soil = df[['STATION_TYPE', 'LONGITUDE', 'LATITUDE'] + cols_vs].copy()

# Check qualitative soit types
if 'SOIL_TYPE' in df_soil.columns:
    print('SOIL_TYPE options:\n', df_soil.SOIL_TYPE.unique())

    # Define qualitative descriptions for rock or very stiff soil
    rock = ['ROCA', 'Roca', 'roca', 'ROCK', 'Rock', 'rock', 
            'ROCA BASALTICA', 'ROCA SEDIMENTARIA', 'ROCA  CANTERA']
    
    station_at_rock = df_soil.SOIL_TYPE.isin(rock)
    
    # Assign value for rock soil
    print(f'\nFound {station_at_rock.sum()} stations with rock conditions')
    df_soil.loc[station_at_rock, 'VS30'] = 800
    df_soil.loc[station_at_rock, 'VS30_TYPE'] = 'Inferred'
    print('\nAssociating rock values to Vs30 = 800 m/s')

# Print message if there are columns with no Vs30 values
if df_soil.VS30.isnull().any():
    miss_vals = df_soil.VS30.isnull().sum()
    msg = f'{miss_vals} values missing Vs30 reference data'
    print(f'\x1b[0;31m \n{msg} \x1b[0m')

### Add missing Vs30 values from USGS reference file

In [None]:
# Add VS30 data from USGS values

# Vs30 reference file (no headers, use USGS format)
# When running from Wilson, it's possible to use the worldv30 dataset
# located at '/home/risk/sites_vs30/original/vs30mosaic.hdf5'
vs30_path = '../vs30mosaic.hdf5'

# Use `oq prepare_site_model` to get values
df_soil = usgs_get_vs30.add_vs30_from_ref(df_soil, [vs30_path])

# Add z1pt0 and z2pt5
df_soil['z1pt0'] = calculate_z1pt0(df_soil.VS30)
df_soil['z2pt5'] = calculate_z2pt5(df_soil.VS30)
df_soil['vs30measured'] = 0

# Print values
df_soil.head()

### Generate `site_model_stations.csv` in OQ format


In [None]:
oq_sites = df_soil.copy()

# Select columns and name them for OQ
oq_sites.rename(columns={'LONGITUDE': 'lon', 
                        'LATITUDE': 'lat',
                        'VS30': 'vs30',
                        }, inplace=True)

if len(df_soil) != len(oq_sites):
    print(f'Drop {len(df_soil) - len(oq_sites)} duplicates based on priority')

# Rename site_id
site_id = [str(site)[0] + '_' + str(n) for n, site in 
           enumerate(oq_sites.loc[:, 'STATION_TYPE'])]
oq_sites.loc[:, 'custom_site_id'] = site_id

# Save site_model_stations file
cols = ['custom_site_id', 'lon', 'lat', 'vs30','vs30measured','z1pt0','z2pt5']
out_path = os.path.join(folder, 'OpenQuake_gmfs', 'site_model_stations.csv')
oq_sites[cols].to_csv(out_path, encoding='utf-8', index=False)
print('\n Saving:\n', out_path)
