In [1]:
import pandas as pd
from sklearn import preprocessing
import numpy as np
from collections import defaultdict
import os
import pickle
import scipy.stats as st

from itertools import combinations
from scipy.stats import chi2

from code.DataGeneration_class import DataGenerationBernoulli
from code.DataGeneration_class import DataGenerationPoisson

# DATA GENERATION

## Bernoulli

In [None]:
# BERNOULLI GENERATION 

for w in range(0,100):
  
  DG = DataGenerationBernoulli(False, True, 1)
  DG.set_parameters()
  eta, y = DG.compute_eta_and_y()

  x1 = DG.fix[0]

  g = []
  j = 1
  for i in y:
    g.append(np.repeat(j, len(i)))
    j = j + 1

  y = np.hstack(y).squeeze()
  x1 = np.hstack(x1).squeeze()
  g = np.hstack(g).squeeze()

  data = {'y': y, 'x1': x1, 'g': g}
  df = pd.DataFrame(data)
  
  filename = 'Bernoulli_' + str(w) +'.csv'

  df.to_csv('output/comparison_state_of_art/Bernoulli_DG_output/' + filename)

## Poisson

In [4]:
from google.colab import files

# POISSON GENERATION

for w in range(0,100):
  
  DG = DataGenerationPoisson(False, True, 1)
  DG.set_parameters()
  eta, y = DG.compute_eta_and_y()

  x1 = DG.fix[0]

  g = []
  j = 1
  for i in y:
    g.append(np.repeat(j, len(i)))
    j = j + 1

  y = np.hstack(y).squeeze()
  x1 = np.hstack(x1).squeeze()
  g = np.hstack(g).squeeze()

  data = {'y': y, 'x1': x1, 'g': g}
  df = pd.DataFrame(data)
  
  filename = 'Poisson_' + str(w) +'.csv'

  df.to_csv('output/comparison_state_of_art/Poisson_DG_output/' + filename)

# SPGLMM

In [None]:
from code.algorithm_alpha import algorithm_alpha

def compute_order(knots):
    if knots.ndim > 1:
        return np.flip(np.argsort(knots[:, 0]))
    else:
        return np.flip(np.argsort(knots))

## Bernoulli

In [None]:
# Bernoulli

results3 = pd.DataFrame(columns=['Sensitivity', 'Specificity', 'Accuracy'])
melt = pd.DataFrame(columns=['variable', 'value'])

for file in os.listdir('/Bernoulli_datasets'):
  if file.endswith(".csv"):
    cond = True
    while cond:
        try:
            df = pd.read_csv('output/comparison_state_of_art/Bernoulli_DG_output/' + file).iloc[:,1:]

            N = df.g.nunique()
            lengths = np.array(df.groupby('g').count().reset_index().iloc[:,1])
            y = list(df.groupby('g')['y'].apply(np.array).values)

            fix = defaultdict(list)

            fix[0] = df.groupby('g')['x1'].apply(np.array).values.tolist()

            ran_var = False
            ran_int = True
            n_fix = 1
            tol = 0.05
            sim = True

            #ran_var, ran_int, n_fix, sim, tol, model, fix, lengths, y, N
            knots, par, W, hess_ran, hess_fix = algorithm_alpha(ran_var, ran_int, n_fix, sim, tol, model = 'B', 
                                                                fix=None, lengths=None, y=None, N=None)

            x = compute_order(knots)
            knots = knots[x]
            W = W[:, x]

            group = np.array([np.nan if np.sum(W[i,:])==0 else np.argmax(W[i,:]) for i in range(N)])

            if len(knots)==3:
              cond = False
        except:
            pass
    
    df['ran_int'] = df['g'].apply(lambda x: knots[0] if x == 1 or x == 2 else knots[2] if x == 8 or x == 9 or x == 10 else knots[1])
    df['beta1'] = np.repeat(par, len(df))

    temp = df['beta1'] * df['x1'] + df['ran_int']
    df['y_pred'] = temp.apply(lambda x: 1 if np.exp(x)/(1+np.exp(x)) > 0.5 else 0)


    # Append rows in Empty Dataframe by adding dictionaries
    melt = melt.append({'variable': 'c1,1', 'value': knots[0]}, ignore_index=True)
    melt = melt.append({'variable': 'c1,2', 'value': knots[1]}, ignore_index=True)
    melt = melt.append({'variable': 'c1,3', 'value': knots[2]}, ignore_index=True)
    melt = melt.append({'variable': 'beta1', 'value': par[0]}, ignore_index=True)
    print(melt)

    confusion_matrix = pd.crosstab(df['y'], df['y_pred'], rownames=['Actual'], colnames=['Predicted'])

    TN = confusion_matrix[0][0]
    TP = confusion_matrix[1][1]
    FN = confusion_matrix[0][1]
    FP = confusion_matrix[1][0]

    Sensitivity = TP/(TP + FN) 
    Specificity = TN/(TN + FP) 
    Accuracy = (TN + TP)/(TN+TP+FN+FP) 

    # Append rows in Empty Dataframe by adding dictionaries
    results3 = results3.append({'Sensitivity': Sensitivity, 'Specificity': Specificity, 'Accuracy': Accuracy}, ignore_index=True)
    print(results3)

In [None]:
filename = 'results3' + '.csv'
results3.to_csv('output/comparison_state_of_art/Bernoulli_SPGLMM_output/' + filename)

filename = 'melt' +'.csv'
melt.to_csv('output/comparison_state_of_art/Bernoulli_SPGLMM_output/' + filename)

## Poisson

In [None]:
# POISSON
results3_POI = pd.DataFrame(columns=['MSE', 'MSE_log', 'Chi-Squared-Error'])
melt_POI = pd.DataFrame(columns=['variable', 'value'])

for file in os.listdir('output/comparison_state_of_art/Poisson_DG_output/'):
  if file.endswith(".csv"):
    cond = True
    while cond:
        try:
            df = pd.read_csv('output/comparison_state_of_art/Poisson_DG_output/' + file).iloc[:,1:]

            N = df.g.nunique()
            lengths = np.array(df.groupby('g').count().reset_index().iloc[:,1])
            y = list(df.groupby('g')['y'].apply(np.array).values)

            fix = defaultdict(list)

            fix[0] = df.groupby('g')['x1'].apply(np.array).values.tolist()

            ran_var = False
            ran_int = True
            n_fix = 1
            tol = 0.05
            sim = True

            knots, par, W, hess_ran, hess_fix = algorithm_alpha(ran_var, ran_int, n_fix, sim, tol, model = 'P', 
                                                                fix=None, lengths=None, y=None, N=None)

            x = compute_order(knots)
            knots = knots[x]
            W = W[:, x]

            group = np.array([np.nan if np.sum(W[i,:])==0 else np.argmax(W[i,:]) for i in range(N)])

            if len(knots)==3:
              cond = False
        except:
            pass
    
    df['ran_int'] = df['g'].apply(lambda x: knots[0] if x == 1 or x == 2 else knots[2] if x == 8 or x == 9 or x == 10 else knots[1])
    df['beta1'] = np.repeat(par, len(df))

    temp = df['beta1'] * df['x1'] + df['ran_int']
    df['y_pred'] = temp.apply(lambda x: round(np.exp(x)))

    # Append rows in Empty Dataframe by adding dictionaries
    melt_POI = melt_POI.append({'variable': 'c1,1', 'value': knots[0]}, ignore_index=True)
    melt_POI = melt_POI.append({'variable': 'c1,2', 'value': knots[1]}, ignore_index=True)
    melt_POI = melt_POI.append({'variable': 'c1,3', 'value': knots[2]}, ignore_index=True)
    melt_POI = melt_POI.append({'variable': 'beta1', 'value': par[0]}, ignore_index=True)
    print(melt_POI)

    MSE = np.mean((df['y'] - df['y_pred'])**2)
    MSE_log = np.mean((np.log(df['y']+1) - np.log(df['y_pred']+1))**2)
    CSE = np.mean((df['y'] - df['y_pred'])**2 / (df['y_pred']+1))


    # Append rows in Empty Dataframe by adding dictionaries
    results3_POI = results3_POI.append({'MSE': MSE, 'MSE_log': MSE_log, 'Chi-Squared-Error': CSE}, ignore_index=True)
    print(results3_POI)

In [None]:
filename = 'results3_POI' + '.csv'
results3_POI.to_csv('output/comparison_state_of_art/Poisson_SPGLMM_output/' + filename)

filename = 'melt_POI' +'.csv'
melt_POI.to_csv('output/comparison_state_of_art/Poisson_SPGLMM_output/' + filename)

#from google.colab import files

#files.download('melt_POI' + '.csv')
#files.download('results3_POI' +'.csv')