In [2]:
# %load C:\Users\Patron\Desktop\model_project\Lab\3SpeciesModel\d9_2runModelAndTestModifyNorm.py
"""
Created on Wed Sep  2 13:53:39 2020
This file run the Linear regression model on modified normalized model
@author: Patron
"""
import import_ipynb
import trainModelAndTest as tmt
import numpy as np
from time import time



importing Jupyter notebook from trainModelAndTest.ipynb


In [8]:
def runmodels( names, inputpath, outnames, outputpath, normed, gLVMtr, data_thresh=0, param_thresh=0, regMode = 'LOO', fold = 10):
    """ This function runs the linear regression models for specified raw data sets and saves trained parameters in specified outpath; evaluation results (spearman, pearson coeff and %correct signs) will be returned
    
    names: a list of names of input files 
    inputpath: path of the input files 
    outnames: a list of names you want the trained parameter mtr to be saved as 
    outputpath: path of the output files
    
    normed: boolean value to be set if the input training data is normalized
    gLVMtr: aijs mtr used in the gLV model to generate the raw data 
    
    data_thresh: threshold used to exclude small raw data as absent species 
    param_thresh: threshold used to exclude small parameters when comparing the signs of the gLVMtr and trained parameters 
    regMode: regularization mode, can be set to 'LOO' (leave-one out cross validation) or 'FOLD' (x fold cross validation); default set to be LOO
    fold: number of fold that can be specified if choose x fold cross validation; default set to be 10
    
    Return evaluation results: a n*3 mtr where n is the number of training set and each row id a list: [Spearman coef, Pearson coef, Pct Correct Sign]"""
    
    ret = np.zeros([len(names), 3]) #evaluation parameters 
    
    for i in range(len(names)):
        print(f"running model on {names[i]}")
        data = np.genfromtxt(inputpath+names[i], delimiter=',')
        sp,pe,pct = 0,0,0 #declare variables 
        filtparam = False
        if(param_thresh != 0):
            filtparam = True
        
        if(normed):
            normedMdls = tmt.trainModelAllSp(data, regMode, fold, normed=True, thresh= data_thresh)
            normedThetas = tmt.thetaMtr(normedMdls)
            np.savetxt(outputpath+outnames[i], normedThetas, delimiter=',')
            [sp,pe] = tmt.calcSpPe(normedThetas, gLVMtr, filtparam, param_thresh)
            pct = tmt.calcPctCorrectSign( normedThetas, gLVMtr, filtparam, param_thresh)
       
        else:  
            noNormMdls = tmt.trainModelAllSp(data, regMode, fold, normed = False, thresh = data_thresh)
            noNormThetas = tmt.thetaMtr(noNormMdls)
            np.savetxt(outputpath+outnames[i], noNormThetas, delimiter=',')
            [sp,pe] = tmt.calcSpPe(noNormThetas, gLVMtr, filtparam, param_thresh)
            pct = tmt.calcPctCorrectSign( noNormThetas, gLVMtr, filtparam, param_thresh)
          
        ret[i,:] = [sp,pe,pct] 
   
    return ret

In [9]:
#input and output file path 
inpath1 = "./interm/" #for normalized data
inpath2 = "./rawData/" #for non-normalized raw data 
outpath ="./output/"
###gLVMtr= 


#### input file and output file names to be set 
names1=["endODMtrZs.csv"] #the normalized end od training data 
outnames = ["thetaMtrNormed.csv"]
names2 = ["endOD_dtr_defined.csv"] #the non-normalized raw training data 
outnames2 = ["thetaMtr_NoNorm.csv"]





# runmodels arguments: 
#names, inputpath, outnames, outputpath, normed, gLVMtr, data_thresh=0, param_thresh=0, regMode = 'LOO', fold = 10)
gLVMtr = np.genfromtxt("./interm/gLV_aij.csv", delimiter=',')
paramEvalNormed = runmodels(names1, inpath1, outnames, outpath, True, gLVMtr, 0,0.001 )
np.savetxt(outpath + "eval_normed.csv", paramEvalNormed, delimiter=',')
paramEval2 = runmodels(names2,inpath2, outnames2, outpath, False, gLVMtr,0,0.001)
np.savetxt(outpath+ "eval_notNormed.csv", paramEval2,delimiter=',')

running model on endODMtrZs.csv
from tmt process mtr: thresh = 0.001
from tmt process mtr: thresh = 0.001
running model on endOD_dtr_defined.csv
from tmt process mtr: thresh = 0.001
from tmt process mtr: thresh = 0.001
