In [141]:
from collections import OrderedDict; from dash import ALL, dcc, html, Input, MATCH, Output, State;
from flask import Markup; from IPython.display import display, Markdown;
from matplotlib.ticker import MaxNLocator; from plotly.subplots import make_subplots;
from plotly.tools import mpl_to_plotly; from sklearn.metrics import accuracy_score, roc_curve, auc;
from scipy.stats import pearsonr,spearmanr; from tensorflow.keras import layers, models, losses; 
from torch.utils.data import TensorDataset, DataLoader;
import dash; import math; import matplotlib.pyplot as plt; import networkx as nx; import numpy as np;
import numpy.random as rnd; import pandas as pd; import os; import PIL; import plotly.express as px;
import plotly.graph_objects as go; import requests as rq; import seaborn as sns; import sklearn as skl; 
import sklearn.linear_model as lm; import statsmodels as sm; import statsmodels.api as sma; 
import statsmodels.formula.api as smf; import requests as rq; import tensorflow as tf; import torch; 
import torch.nn as nn; import torch.nn.functional as F; import torch.optim as optim; import torchvision; 
import torchvision.transforms as transforms; import urllib.request; import warnings;

# file settings
warnings.simplefilter(action='ignore', category=Warning); sns.set(); np.set_printoptions(threshold=np.inf); 
pd.set_option('display.max_rows',999); pd.set_option('display.max_columns', 12);  
pd.set_option('display.max_rows',999); pd.set_option('display.width', 200);

# global settings
MRL = 3; #Measure Rounding Level
folder = "datasources"; files = ["datafile.csv","schema.csv","measures.csv","propositions.csv"]
subtitles = {'Title1':"Thakor Lab",
			'Title2':"Cerebrovascular Autoregulation and Post-Cardiac Arrest Resuscitation Therapies Team",
			'Title3':"Statistical Analysis GUI, v1.0",
			'Step00':"Select Response Variable: ", 'Step04':"Check Predictor Variable(s) to test: ",
			'Step06':"Select Measures to Display: ", 'Step07':"Select Model Proposition to Calculate: ",
			'Step09':"Enter the Configuration Settings for the selected model (default values pre-entered): ",
			'Step09.01':"Random Seed: ", 'Step09.02':"Percent of Data to use in Training vs Testing: ",
			'Step09.03':"Include Intercept value in Model: ", 'Step09.04':"Magnitude of Iteration Limit: ",
			'Step09.05':"Percentile to use as Threshold for Binarization of Response Variable: ", 
			'Step09.06':"Number of Layers in Model: ", 'Step09.07':"Branches or Nodes per Layers of Model: ",
			'Step09.08':"Learning Rate of Model: ",
			'Step12.01':"Histogram of Response Variable Values", 'Step12.02':"Histogram(s) of Predictor Variable Values",
			'Step13':"Table of Requested Measure(s)",'Step14':"Details of Requested Model"};

# globalized variables:
# df_Raw; df_Schema; df_Meas; df_Prop;
# namesPred; namesResp; namesMeas; namesProp;
# uniList

# BEGIN LOGIC FUNCTIONS
def loadData():
#. load data files into memory
	global df_Raw; df_Raw = pd.read_csv(folder+'/'+files[0]);
	global df_Schema; df_Schema = pd.read_csv(folder+'/'+files[1]);
	global namesResp; namesResp = genFieldDict('Response',df_Schema); 
	global namesPred; namesPred = genFieldDict('Predictor',df_Schema);
	global df_Meas; df_Meas = pd.read_csv(folder+'/'+files[2]); 
	global namesMeas; namesMeas = genFieldDict('Ready',df_Meas); 
	global df_Prop; df_Prop = pd.read_csv(folder+'/'+files[3]);
	global namesProp; namesProp = genFieldDict('Ready',df_Prop);
	global uniList; uniList = genFieldDict('Variate',df_Meas);
	
def genFieldDict(req,df):
#. generate field dictionary from dataframe based on specified parameter flag
    fieldlist = df.loc[(df[req]==1)].Column.to_numpy();
    fieldDict = dict(enumerate(fieldlist.flatten(), 1));
    #fieldDictInv = dict((v, k) for k, v in fieldDictInv.items());
    return fieldDict;
    
def genMeas(varResp, varPred, varMeas):
# pull specified values from data sources -> pass to measure switch -> pass back to view
	retResp = {}; retVal = {}; measList = []; 
	for meas in varMeas:
		measList.append(namesMeas[meas]);
	respVals = df_Raw[namesResp[varResp]];     
	for meas in measList:
		if (meas in uniList.values()):
			retResp[meas] = calcSwitch(meas,respVals);
	if (len(varPred)>0):
		predDict = {};
		for varP in varPred:
			predName = namesPred[varP];
			predVals = df_Raw[predName];
			retPredCurr = {}; 
			for meas in measList:
				if (meas in uniList.values()):
					retPredCurr[meas] = calcSwitch(meas,predVals);
				else:
					retPredCurr[meas] = calcSwitch(meas,predVals,respVals);            
			predDict[predName] = retPredCurr;
			retVal['pred'] = predDict;
	retVal['resp'] = retResp;
	return retVal;

def calcSwitch(measName,varA,varB=[]):
# identify measure -> pass values to measure-specific function -> format result as single string -> pass back to view
	retVal = '';
	if (measName=='Mean'):
		retVal = calcMean(varA);
	elif (measName=='Std'):
		retVal = calcStd(varA);
	elif (measName=='Correlation-Pearson'):
		val,pval = calcCorrP(varA,varB);        
		retVal = str(val)+"("+str(pval)+")";
	elif (measName=='Correlation-Spearman'):
		val,pval = calcCorrS(varA,varB);
		retVal = str(val)+"("+str(pval)+")";
	return retVal;

# BEGIN MEASURE-SPECIFIC FUNCTIONS
def calcMean(varList):
    return round(np.mean(varList),MRL);

def calcStd(varList):
    return round(np.std(varList),MRL);

def calcCorrP(varA,varB):
	retV, retP = pearsonr(varA,varB); 
	retV = round(retV,MRL);
	retP = round(retP,MRL);
	return [retV,retP];

def calcCorrS(varA,varB):
	retV, retP = spearmanr(varA,varB); 
	retV = round(retV,MRL);
	retP = round(retP,MRL);
	return [retV,retP];
# END MEASURE-SPECIFIC FUNCTIONS
								 
def genModel(varResp, varPred, varProp, d_Conf):
# pull specified values from data sources -> pass to model switch -> pass back to view
    retVal = {}; 
    respName = namesResp[varResp]; valsResp = df_Raw[respName];
    valsPred = pd.DataFrame(); 
    for varP in varPred:
        predName = namesPred[varP];
        valsPred[predName] = df_Raw[predName];
    propName = namesProp[varProp];
    retVal = modelSwitch(propName,valsResp,valsPred,d_Conf);
    return retVal;

def modelSwitch(propName,valsResp,valsPred,d_Conf):
# identify proposition -> pass values and configuration to proposition-specific function -> calculate error rates -> pass back to view
	retVal = {};
	np.random.seed(d_Conf['Seed']); sample = np.random.uniform(size = len(valsResp.index)) < d_Conf['TrainPct'];
	trainResp = valsResp[sample]; testResp = valsResp[~sample];
	trainPred = valsPred[sample]; testPred = valsPred[~sample];
	if (propName=='Linear-Regression'):
		retVal['model'] = modelLinReg(trainResp,trainPred,d_Conf);
		modTrain = retVal['model'].predict(trainPred);
		modTest = retVal['model'].predict(testPred);
		retVal['error'] = {};
		retVal['error']['train'] = assessErr(trainResp.to_numpy(),modTrain);
		retVal['error']['test'] = assessErr(testResp.to_numpy(),modTest);
	elif (propName=='Logistic-Regression'):
		thresh = np.percentile(valsResp,d_Conf['BinThresh']);
		trainResp = (trainResp > thresh).astype(int); 
		testResp = (testResp > thresh).astype(int); 
		retVal['model'] = modelLogReg(trainResp,trainPred,d_Conf);
		modTrain = retVal['model'].predict(trainPred);
		modTest = retVal['model'].predict(testPred);
		retVal['error'] = {}; retVal['roc'] = {};
		retVal['roc']['train'] = assessAUC(trainResp.to_numpy(),modTrain);
		retVal['roc']['test'] = assessAUC(testResp.to_numpy(),modTest);
		retVal['error']['train'] = assessErr(trainResp.to_numpy(),modTrain,True);
		retVal['error']['test'] = assessErr(testResp.to_numpy(),modTest,True);
	elif (propName=='Neural-Network'):
		model = modelNN(trainResp,trainPred,d_Conf);
		retVal['model'] = model;
		modTrain = model(convNNType(trainPred)).detach().numpy().T[0];
		modTest =  model(convNNType(testPred)).detach().numpy().T[0];
		retVal['error'] = {};
		retVal['error']['train'] = assessErr(trainResp.to_numpy(),modTrain);
		retVal['error']['test'] = assessErr(testResp.to_numpy(),modTest);
	return retVal;

# BEGIN PROPOSITION-SPECIFIC FUNCTIONS
def modelLinReg(resp,pred,d_Conf):
    model = lm.LinearRegression(fit_intercept=d_Conf['Intercept']).fit(pred,resp);
    return model;

def modelLogReg(resp,pred,d_Conf):
    model = lm.LogisticRegression(fit_intercept=d_Conf['Intercept'],max_iter=d_Conf['Magnitude']).fit(pred,resp);
    return model;

def convNNType(df):
    return torch.from_numpy(df.values).float();

def modelNN(rdat,pdat,d_Conf):
    pdat = convNNType(pdat); rdat = convNNType(rdat);
    inSz = pdat.shape[1]; outSz = 1;
    modelGraph = OrderedDict([('inLayer', nn.Linear(inSz,d_Conf['Nodes'])),('relu1', nn.ReLU())]);
    if (d_Conf['Layers']>1):
        for idx in range(1,d_Conf['Layers']):
            modelGraph['hl'+str(idx)] = nn.Linear(d_Conf['Nodes'],d_Conf['Nodes']); 
            modelGraph['relu'+str(1+idx)] = nn.ReLU();
    modelGraph['outLayer'] = nn.Linear(d_Conf['Nodes'],outSz); model = nn.Sequential(modelGraph);    
    model.zero_grad(); lossFn = nn.MSELoss(reduction='sum');
    optim = torch.optim.Adam(model.parameters(), d_Conf['LearnRate']);
    for idx in range(d_Conf['Magnitude']):
        currPred = model(pdat); currLoss = lossFn(currPred,rdat);
        optim.zero_grad(); currLoss.backward(); optim.step();
    return model; 
# END PROPOSITION-SPECIFIC FUNCTIONS

def assessErr(truth,prediction,bindata=False):
    if (bindata): # Percent Error
        retVal = sum(abs(np.subtract(truth,prediction))/len(truth));
    else: # RMS Error
        retVal = round(math.sqrt(sum((np.subtract(truth,prediction))**2)/len(truth)),2);
    return retVal; 

def assessAUC(truth,prediction):
    fpr, tpr, thresh = roc_curve(truth,prediction); calcAUC = auc(fpr, tpr); 
    return [fpr,tpr,calcAUC];

def makeROC(inDict,title):
	fig = go.Figure();
	fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], line={'color':'navy','width':2}));
	fig.add_trace(go.Scatter(x=inDict[0], y=inDict[1], line={'color':'darkorange','width':2,'dash':'dash'}));
	fig.update_layout(title=('ROC curve (area = '+str(round(inDict[2],MRL))+') for '+title)
                   ,xaxis_title='False Positive Rate'
                   ,yaxis_title='True Positive Rate')
	return fig;

def setDiff(listA,listB):
    return list(set(listA) - set(listB));

def setInt(listA,listB):
    return list(set(listA) & set(listB));
# END LOGIC FUNCTIONS



In [143]:
loadData(); 
selResp=1; selPred = [1,3]; selMeas=[2,3]; selProp=2;
confSeed=123; confTPct=50; confInt=1; confMag=4;
confBThr=50; confLyrs=2; confNodes=2; confLR=2;
d_Conf = {}; d_Conf['Seed'] = confSeed; 
d_Conf['TrainPct'] = confTPct/100; d_Conf['Intercept'] = (confInt==1); 
d_Conf['Magnitude'] = int(1*10**confMag); d_Conf['BinThresh'] = confBThr; 
d_Conf['Layers'] = confLyrs; d_Conf['Nodes'] = confNodes; d_Conf['LearnRate'] = float(1*10**(-int(confLR))); 
retVal = genModel(selResp,selPred,selProp,d_Conf);