In [1]:
#<img src="Figs/GEOS_logo.pdf" width="500"/>

# Statistical testing tools for synthetic data sets: 
## <font color=blue>"stat_synthetic.ipynb"</font>
#### Feb 10, 2022  <font color=red>(v. testing)</font>
##### Jeonghyeop Kim (jeonghyeop.kim@gmail.com)

1. This code is a part of the joint inversion project (project4: joint inversion of GNSS and InSAR)

2. The followings will be plotted and saved:
- The "L-curve"
- The "heat map" as a function of relative weighting values
- The misfit of 3-D motion as a function of damping parameters

**`Note that a "true" 3-D field is only available for synthetic data sets`**


In [2]:
## Import libraries

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
## Initialization

inversion_type='L2'  #'L1' or 'L2'
subdir_name = 'input_files' # sub directory where your input files are



cwd = os.getcwd()
path=os.path.join(cwd, subdir_name)

print("You want statistical test results for the %s inversion results." %inversion_type)
print("You saved %s_stat* & %s_model_coef* files in the following directory:" %(inversion_type,inversion_type))
print("'%s'" %path)

You want statistical test results for the L2 inversion results.
You saved L2_stat* & L2_model_coef* files in the following directory:
'/Users/jkim/main/Joint_GNSS_INSAR/statistical_testing_tools_02092022/input_files'


In [4]:
## Define ComputeModelNorm function

def ComputeModelNorm(model_arr,inversion='L2'):
    if inversion=='L2':
        norm_type=2
    else:
        norm_type=1   
    model_norm=np.linalg.norm(model_arr,norm_type)
    
    return model_norm

In [5]:
## Define MakeFileName function 

def MakeFileName(head,ext,paramList):
    
    glue = '_'
    paramList_glue = glue.join(paramList)
    filename = head + paramList_glue + ext
    
    return filename

In [6]:
## Save parameter variables from the existing files
## wI : weighting for InSAR
## wG : weighting for GNSS
## alpha : regularization param 
## beta  : 2nd regularization param (for L2 only)

load_params = []
scan_file = inversion_type+'_stat_'

if inversion_type=='L2':
    names=['wI','wG','alpha','beta']     
    with os.scandir(path) as it: #scan the directory
        for entry in it:
            if entry.name.startswith(scan_file) and entry.is_file():
                element1=entry.name.split("_")[2:][0]
                element2=entry.name.split("_")[2:][1]
                element3=entry.name.split("_")[2:][2]
                element4=entry.name.split("_")[2:][-1][0:-4] 
                # [0:-4] means without extension (e.g., .dat or .txt)
                list_elements = [element1, element2, element3, element4]
                load_params.append(list_elements)

elif inversion_type=='L1':
    names=['wI','wG','alpha']
    with os.scandir(path) as it: #scan the directory
        for entry in it:
            if entry.name.startswith(scan_file) and entry.is_file():
                element1=entry.name.split("_")[2:][0]
                element2=entry.name.split("_")[2:][1]
                element3=entry.name.split("_")[2:][-1][0:-4]
                # [0:-4] means without extension(e.g., .dat or .txt)
                list_elements = [element1, element2, element3]
                load_params.append(list_elements)        

else: #stop the code
    print(" ********************************************** ")
    print(" *                                            * ")
    print(" * inversion_type must be either 'L2' or 'L1' * ")    
    print(" *                                            * ")    
    print(" ********************************************** ")

    exit() 
    
    
df_param_string = pd.DataFrame(load_params,columns=names)
df_param_numeric = df_param_string.astype(float)

In [7]:
model_norms = []
misfits = []

for i in range(len(df_param_string)):
    paramList=df_param_string.iloc[i,:].values
    
    ## Calculate & save model norms
    head = inversion_type + '_model_coef_'
    ext = '.dat'
    filename_model=MakeFileName(head,ext,paramList)
    model_arr = np.loadtxt(path+'/'+filename_model)
    model_norm=ComputeModelNorm(model_arr,inversion=inversion_type)
    model_norms.append(model_norm)
    
    ## Save inversion misfits
    head = inversion_type + '_stat_'
    ext = '.txt'
    filename_stat=MakeFileName(head,ext,paramList)
    stat_arr = np.loadtxt(path+'/'+filename_stat)
    misfits.append(float(stat_arr))

In [8]:
df_param_numeric['model_norm']=model_norms
df_param_numeric['model_misfit']=misfits

#fixed wl, wG, beta -> vary alpha -> save norm and misfit -> L curve.
#fixed alpha and beta -> vary wl, wG -> save misfit -> heat map.

Unnamed: 0,wI,wG,alpha,beta,model_norm,model_misfit
0,0.10,0.10,10.000,5.000,25.625681,6291.255401
1,0.01,2.00,0.100,0.010,486.927414,7224.552573
2,0.01,3.00,0.050,0.050,492.655961,7249.043165
3,0.01,2.00,1.000,3.000,407.523655,7092.312573
4,0.01,3.00,3.000,4.000,309.693484,6925.609855
...,...,...,...,...,...,...
1772,0.01,5.00,4.000,0.001,279.336802,6871.040151
1773,0.01,0.01,0.100,0.100,476.452555,6092.624345
1774,0.01,5.00,1.000,0.100,407.556648,7108.902533
1775,0.10,1.00,0.001,0.500,483.954932,6371.531847


In [9]:
# ## Obtain norms of the column vectors in the G matrix
# df_G=pd.read_csv(path+'/'+'G_matrix.out')
# n_col=df_G.shape[1]
# df_G.columns=[i for i in range(n_col)]
# G = df_G.to_numpy()

# amp_column=np.linalg.norm(G,axis=0) #column vectors' magnitudes

# ##normalized_model = norm_column * model

In [10]:
#df = df.drop_duplicates(subset=['0'],keep=False)