In [None]:
import utilities_PLSOM as mod
%reload_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import seaborn as sns
import altair as alt
from collections import Counter
from matplotlib.patches import Rectangle
import hyperopt
from joblib import dump, load
import math
import os

from plotly import express as px
from plotly import graph_objects as go
from plotly import offline as pyo
import warnings
import networkx as nx
warnings.filterwarnings('ignore')

#remove scientific notation
np.set_printoptions(precision=3, suppress = True)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

# First loop: iterate over different granularities

### init variables

In [None]:
#Define recipe space

maxInt = 2
fiber_low = 0
fiber_high = 30
matrix_low = 70
matrix_high = 100
#granularitySteps = [1, 2, 3, 5]
granularitySteps = [5, 10]


In [None]:
#Define folder structure
parent_folder = "./GranularitySearchCollagen/"


In [None]:
#Define hyperopt parameters
param = [("multiplier_1", 1, 5, 0.1), ("multiplier_2", 0.5,2, 0.1)]
max_iter_som = 50000
s = 1
max_iter_BO = 100
loss = ["spread", "deadNodes", "qe"]
mode = "norm"

### The loop

> Make directory for iteration \
> Make dictionary from granularity step\
> Create Dataset\
> Clean Data\
> Scale Data\
> Train PCA\
> Run Hyperopt\
> Analyze best SOM \
> Compare HD to found LD \
>> Save everything in the directory

In [None]:
for granularityStep in granularitySteps:
      print("Granularity step: " + str(granularityStep))
      #make directory for iteration
      subfolder = parent_folder + "Step" + str(granularityStep) + "/"
      try:
            os.mkdir(subfolder)
      except:
            pass
      print(subfolder)

      #make dictionary from granularity step
      d = {'matrix': [matrix_low, matrix_high, granularityStep],
            'bark': [fiber_low, fiber_high, granularityStep],
            'woodflour': [fiber_low, fiber_high, granularityStep],
            'seagrass': [fiber_low, fiber_high, granularityStep],
            'cotton': [fiber_low, fiber_high, granularityStep]}
      print(d)

      #Create dataset
      #the plot shows a hist of the interactions for both ff and cc data
      ff, cc = mod.generateDataset(d, maxInt, True, "%s/%spct_%sto%s_%sinter" % (subfolder, granularityStep, fiber_low, fiber_high, maxInt))

      #Clean data
      data = ff
      data_noMat = mod.cleanData(data, "recipe")
      columns = data_noMat.columns.tolist()
      print(data_noMat.describe())
      data_categ = mod.createCategory(data_noMat)

      #Scale Data
      data_noMat_std, scalerModel = mod.scaleData(data_noMat,"StandardScaler", True, subfolder)
      print(data_noMat_std.describe())

      #Checkdistributions
      fig, axes = plt.subplots(len(data_noMat.columns), figsize = (19,30))
      for i,w in enumerate(data_noMat.columns):
            sns.distplot(data_noMat[w], kde=True, ax=axes[i])

      #Train PCA
      model_PCA, fullfact_pca = mod.trainPCA(data_noMat_std, True, "%s/PCAmodel" % subfolder)

      #Run Hyperopt
      trials_df, best = mod.RunHyperopt_PLSOM(data_noMat_std, param, max_iter_som, s, max_iter_BO, loss, mode, True )
      trials_df.to_csv("%s/HyperoptTrials_%spct_%sto%s_%sinter.csv" % (subfolder, granularityStep, fiber_low, fiber_high, maxInt))

      #Analyze best map
      best_som, bs_sizeX, bs_sizeY = mod.getBestMap(data_noMat_std, best["multiplier_1"], best["multiplier_2"], max_iter_som, s)
      mod.saveSom(best_som, bs_sizeX, bs_sizeY, max_iter_som, subfolder)
      metrics = mod.getMapMetrics(best_som, data_noMat_std, bs_sizeX, bs_sizeY)
      print(metrics)

      #Plot best map
      umat = mod.plotUmatrix(best_som, bs_sizeX, bs_sizeY, max_iter_som, best_som.thisSigma, True, subfolder)
      mod.plotHitmap(best_som, data_noMat_std, bs_sizeX, bs_sizeY, True, subfolder)
      mod.plotParams(best_som, data_noMat_std, bs_sizeX, bs_sizeY, max_iter_som, best_som.thisSigma, scalerModel,  True, subfolder)
      df = mod.drawSOMnodesInPCA_3d(data_noMat_std, bs_sizeX, bs_sizeY, best_som, model_PCA, False, max_iter_som, best_som.thisSigma, True)

      #PLOT the map and cc and experiments
      #Initilize map
      fig_somPlot, ax_somPlot = mod.plotSOM_base(best_som, bs_sizeX, bs_sizeY, max_iter_som)

      #Draw Experimental data
      exp_data_recipe, exp_data_perf = mod.cleanData(pd.read_csv("./data/ExperimentalDataBoneglue.csv"), "recipe+performance", 0, 4)
      exp_data_recipe_std = mod.applyScaling(exp_data_recipe, scalerModel, True)
      data_exp_coord = mod.plotSOM_addDataWithIdx(best_som, exp_data_recipe_std, fig_somPlot, ax_somPlot, "red", 20, 1, max_iter_som, bs_sizeX, bs_sizeY , False)

      #Draw central composites
      cc_data = mod.cleanData(cc, "recipe")
      cc_data_std = mod.applyScaling(cc_data, scalerModel, True)
      cc_data_coord = mod.plotSOM_addData(best_som, cc_data_std, fig_somPlot, ax_somPlot, "green", 50, 1, "D", max_iter_som,bs_sizeX, bs_sizeY, False)

      #Draw factorial dataset
      data_ff_coord = mod.plotSOM_addData(best_som, data_noMat_std, fig_somPlot, ax_somPlot, "blue", 20, 0.2, "o", max_iter_som,bs_sizeX, bs_sizeY, False)


      fig_somPlot.savefig("%s/%s x %s SOM iter%s sigma%s.jpg" % (subfolder, bs_sizeX, bs_sizeY, max_iter_som, best_som.thisSigma))
      fig_somPlot.show()


      #Compare HD to found LD using graphs

      #Define the distances

      #This is different based on the granularity of the data
      data_rad = mod.getDatasetDistanceForGraph(data_noMat)
      #This is fixed because of the grid unit of the SOM
      som_rad = round(math.sqrt(2), 10)

      #Build and plot graph
      G_data, G_som = mod.getNeighborsGrah(data_noMat, data_ff_coord, data_rad, som_rad, best_som)
      fig = mod.drawGraphs(G_data, G_som,True, subfolder)
      G_combi = mod.compareGraphs(G_data, G_som, True, subfolder)

      mod.drawDendogram(data_noMat_std, best_som, scalerModel, True, subfolder)

      print("LOOP ENDS")
