# Spectralgen Code

Code related to the work "J.J. García-Esteban, J.C. Cuevas, J. Bravo-Abad, "Generative adversarial networks for data-scarce spectral applications in the physical sciences”, submitted for publication (2023)."

Contains:

- code used to create and train the networks described. 

- code used to create some of the graphs in the paper.

## General imports

Run this code to import the relevant libraries, load and prepare the datasets and define the evaluation metrics used in the paper.

In [None]:
# Import relevant libraries

import tensorflow as tf

from keras import *
from keras.models import Sequential
from keras.layers import Dense

import numpy as np

import matplotlib.pyplot as plt # for plotting
import matplotlib
matplotlib.rcParams['figure.dpi']=300 # highres display
plt.rcParams["figure.figsize"] = (20,15)
matplotlib.rcParams.update({'font.size': 30})
%matplotlib inline

In [None]:
# Data loading

# Total amount of data: 6561 examples

label_path = '/Users/usuario/Desktop/GAN_multilayered/labels8metal.csv'
data_path = '/Users/usuario/Desktop/GAN_multilayered/data8metal.csv'

dataindex_raw = np.genfromtxt(label_path,dtype="float32") # RAW indices
dataindex = dataindex_raw[:,[1,2,3,4,5,6,7,8]]
datafile = np.genfromtxt(data_path,dtype="float32") # RAW spectra

# Random_index

np.random.seed(42)

random_index = np.linspace(0,dataindex.shape[0]-1,dataindex.shape[0])
np.random.shuffle(random_index)
      
# Normalization

input_mean = np.mean(np.log10(dataindex),axis=0)
input_std = np.std(np.log10(dataindex),axis=0)

data_mean = np.mean(np.log10(datafile),axis=0)
data_std = np.std(np.log10(datafile),axis=0)

# Preallocate the sets

# Target data

y_target = np.zeros([datafile.shape[0],datafile.shape[1]]) 
y_target_rand = np.zeros([datafile.shape[0],datafile.shape[1]])

# Input data

y_in = np.zeros([dataindex.shape[0],dataindex.shape[1]])
y_in_rand = np.zeros([dataindex.shape[0],dataindex.shape[1]])

# Populate the sets

for i in range(dataindex.shape[0]):

    index = int(random_index[i])
    
    y_in[i,:] = (np.log10(dataindex[i,:])-input_mean)/input_std
    y_target[i,:] = (np.log10(datafile[i,:])-data_mean)/data_std
    
    y_in_rand[i,:] = (np.log10(dataindex[index,:])-input_mean)/input_std
    y_target_rand[i,:] = (np.log10(datafile[index,:])-data_mean)/data_std

# Create the train and val sets

val = 0.2 # Validation fraction [0,1]

train_index = np.linspace(0,np.int((1-val)*datafile.shape[0]-1),np.int((1-val)*datafile.shape[0])).astype(int)
val_index = np.linspace(np.int((1-val)*datafile.shape[0]),datafile.shape[0]-1,np.int(val*datafile.shape[0])+1).astype(int)

y_in_train = y_in_rand[train_index,:]
y_in_val = y_in_rand[val_index,:]

y_target_train = y_target_rand[train_index,:]
y_target_val = y_target_rand[val_index,:]

In [None]:
# Evaluation metrics

# FFNN

def mean_relative_abs_error_pointwise(net, inputs, target):
    
    Npoints = target.shape[1]
    Nexamples = target.shape[0]
    
    error = np.zeros([1,1])
    
    for i in range(Nexamples):
        
        fake = np.reshape(net.predict_on_batch(np.reshape(inputs[i,:],(1,inputs.shape[1]))),(Npoints,))
        
        faked = 10**(fake*data_std+data_mean)
        realed = 10**(target[i,:]*data_std+data_mean)
        
        for j in range(Npoints):
            
            error = error + np.abs((faked[j]-realed[j])/realed[j])
    
    return error/(Npoints*Nexamples)*100

def integral_relative_error(net, inputs, target):
    
    Npoints = target.shape[1]
    Nexamples = target.shape[0]
    
    error = np.zeros([1,1])
    
    for i in range(Nexamples):
        
        fake = np.reshape(net.predict_on_batch(np.reshape(inputs[i,:],(1,inputs.shape[1]))),(Npoints,))
        
        faked = 10**(fake*data_std+data_mean)
        realed = 10**(target[i,:]*data_std+data_mean)
        
        error = error + np.abs((np.trapz(faked)-np.trapz(realed))/np.trapz(realed))
        
    return error/Nexamples*100

# GAN, CGAN, CWGAN

def mean_relative_abs_error_pointwise_gan(net, inputs, target):
    
    Npoints = target.shape[1]
    Nexamples = target.shape[0]
    
    error = np.zeros([1,1])

    fake = net([inputs]).numpy()
    
    for i in range(Nexamples):
        
        faked = 10**(fake[i,:]*data_std+data_mean)
        realed = 10**(target[i,:]*data_std+data_mean)
        
        for j in range(Npoints):
            
            error = error + np.abs((faked[j]-realed[j])/realed[j])
    
    return error/(Npoints*Nexamples)*100

def integral_relative_error_gan(net, inputs, target):
    
    Npoints = target.shape[1]
    Nexamples = target.shape[0]
    
    error = np.zeros([1,1])
    
    fake = net([inputs]).numpy()
    
    for i in range(Nexamples):
        
        faked = 10**(fake[i,:]*data_std+data_mean)
        realed = 10**(target[i,:]*data_std+data_mean)
                
        error = error + np.abs((np.trapz(faked)-np.trapz(realed))/np.trapz(realed))
        
    return error/Nexamples*100