In [None]:
%%capture
import ROOT
import glob
import math
import numpy as np
import pandas as pd
from IPython.display import display, Markdown, HTML
import ipywidgets as widgets

from TPCQCVis.src.drawHistograms import *
from TPCQCVis.src.drawTrending import *
from TPCQCVis.src.drawMultiTrending import *
from TPCQCVis.src.checkHistograms import *
from TPCQCVis.src.checkTrending import *
from TPCQCVis.src.drawBetheBloch import *

import tensorflow
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Model

In [None]:
display(HTML("<style>.container { width:85% !important; }</style>"))
display(HTML("<style>table {float:left;}</style>"))
%jsroot on

## Load data

In [None]:
# Read the Root Files
path = "/cave/alice/data/2022/LHC22m/apass3/"
passName = "apass3"
fileList = glob.glob(path+"*_QC.root")
fileList.sort()
#fileList = fileList[13:]
runList = [fileList[i][36:-8] for i in range(len(fileList))]
rootDataFile=[]
for file in fileList:
    rootDataFile.append(ROOT.TFile.Open(file,"READ"))
#fileList
runList

### Show histograms

In [None]:
%jsroot on
objectName="hdEdxTotMIP_TPC"
[hist,legend,canvas,pad1] = drawHistograms(objectName,rootDataFile,normalize=True,legend=False,log="logxyz",legendNames=runList,pads=False,
                                           drawOption="L SAME")
canvas.Draw()

### Run example

In [None]:
# explicit function to normalize array
def normalize(arr, t_min=0, t_max=1):
    norm_arr = []
    diff = t_max - t_min
    diff_arr = max(arr) - min(arr)   
    for i in arr:
        temp = (((i - min(arr))*diff)/diff_arr) + t_min
        norm_arr.append(temp)
    return norm_arr

# get hists
histograms = np.array([np.array(file.PIDQC.Get("hdEdxTotMIP_TPC"))[1:-1] for file in rootDataFile])
# repeat for statistics
histograms = np.repeat(histograms,1000,axis=0)
np.random.shuffle(histograms)
# log scaling
histograms = np.log(histograms)
# normalize
histograms = np.array([normalize(hist,0.1,0.9) for hist in histograms])
# add noise
#histograms = np.array([normalize(hist) for hist in histograms])
noise = np.random.randn(*histograms.shape)*1e-2
histograms = histograms + noise

In [None]:
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(10, 8))
plt.grid(axis="both")
for hist in histograms[0:1000]:
    plt.plot(np.array([i for i  in range(len(hist))]),hist,c="black",alpha=0.01)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model

# Prepare your histogram data (replace 'histograms' with your actual data)
#histograms = np.array([normalize(np.log(np.array(file.PIDQC.Get("hdEdxTotMIP_TPC"))[1:-1])) for file in rootDataFile])
#histograms = np.repeat(histograms,1000,axis=0)
#np.random.shuffle(histograms)
# Define the dimensions of your input and latent space

# Define the dimensions of your input and latent space
input_dim = histograms.shape[1]
print("Training with",len(histograms),"samples.")
latent_dim = 2

# Define the architecture of the autoencoder
input_layer = Input(shape=(input_dim,))
dropout_rate = 0.05  # Dropout rate for the dropout layer
encoder = Dense(64, activation='relu')(input_layer)
encoder = Dropout(dropout_rate)(encoder)
encoder = Dense(latent_dim, activation='relu')(encoder)
decoder = Dense(64, activation='relu')(encoder)
decoder = Dense(input_dim, activation='sigmoid')(decoder)

# Create the autoencoder model
autoencoder = Model(inputs=input_layer, outputs=decoder)

# Compile the model
autoencoder.compile(optimizer='adam', loss='mean_squared_error')

# Train the autoencoder
autoencoder.fit(histograms, histograms, epochs=30, batch_size=32)

# Extract the encoder part of the autoencoder
encoder_model = Model(inputs=input_layer, outputs=encoder)

# Extract the decoder part of the autoencoder
latent_input = Input(shape=(latent_dim,))
decoder = autoencoder.layers[-2](latent_input)
decoder = autoencoder.layers[-1](decoder)
decoder_model = Model(inputs=latent_input, outputs=decoder)

# Obtain the reduced representation of your histograms
encoded_histograms = encoder_model.predict(histograms)
print("Encoded Histograms Shape:", encoded_histograms.shape)

# Generate a reconstructed histogram from an arbitrary encoding
latent_encoding = np.random.randn(latent_dim)  # Example random latent encoding
latent_encoding = latent_encoding.reshape(1, -1)
reconstructed_histogram = decoder_model.predict(latent_encoding)

# Print the shape of the reconstructed histogram
print("Reconstructed Histogram Shape:", reconstructed_histogram.shape)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# get hists
histograms = np.array([np.array(file.PIDQC.Get("hdEdxTotMIP_TPC"))[1:-1] for file in rootDataFile])
# log scaling
histograms = np.log(histograms)
# normalize
histograms = np.array([normalize(hist,0.1,0.9) for hist in histograms])

# Plot the input histogram
plt.figure(figsize=(10, 4))
for hist in histograms[0:3]:
    # Prepare your histogram data (replace 'input_histogram' with your actual data)
    input_histogram = hist

    # Reshape the input histogram to match the expected input shape of the autoencoder
    input_histogram = input_histogram.reshape(1, -1)
    # Latent space
    encoded_histogram = encoder_model.predict(input_histogram)
    print(encoded_histogram)
    # Feed the input histogram through the trained autoencoder
    reconstructed_histogram = np.mean([autoencoder.predict(input_histogram) for i in range(1)],axis=0)

    #plt.plot(range(len(input_histogram[0])), input_histogram[0]-reconstructed_histogram[0])
    plt.plot(range(len(input_histogram[0])), input_histogram[0])
    #plt.plot(range(len(input_histogram[0])), reconstructed_histogram[0])
    #plt.fill_between(range(len(input_histogram[0])), input_histogram[0], reconstructed_histogram[0])
    
    histogram = decoder_model.predict(encoded_histogram)
    plt.plot(range(len(histogram[0])), histogram[0], "+")
#plt.plot(range(len(reconstructed_histogram[0])), reconstructed_histogram[0])
#plt.yscale("log")
plt.grid(axis="both")
plt.tight_layout()
plt.show()

In [None]:
# get hists
histograms = np.array([np.array(file.PIDQC.Get("hdEdxTotMIP_TPC"))[1:-1] for file in rootDataFile])
# log scaling
histograms = np.log(histograms)
# normalize
histograms = np.array([normalize(hist,0.1,0.9) for hist in histograms])

plt.figure(figsize=(10, 4))
color1 = "#8A5AC2"
color2 = "#3575D5"

for i,hist in enumerate(histograms):
    input_histogram = hist.reshape(1, -1)
    #print(input_histogram.shape)
    encoded_histogram = encoder_model.predict(input_histogram)
    print(encoded_histogram)
    plt.scatter(encoded_histogram[0][0],encoded_histogram[0][1])

In [None]:
plt.figure(figsize=(10, 4))
color1 = "#8A5AC2"
color2 = "#3575D5"

bob = np.linspace(0,10,100)
for i,val in enumerate(bob):
    histogram = decoder_model.predict(np.array([[0,val]]))
    plt.plot(range(len(histogram[0])), histogram[0], c = get_color_gradient(color1, color2, len(bob))[i])

In [None]:
def hex_to_RGB(hex_str):
    """ #FFFFFF -> [255,255,255]"""
    #Pass 16 to the integer function for change of base
    return [int(hex_str[i:i+2], 16) for i in range(1,6,2)]

def get_color_gradient(c1, c2, n):
    """
    Given two hex colors, returns a color gradient
    with n colors.
    """
    assert n > 1
    c1_rgb = np.array(hex_to_RGB(c1))/255
    c2_rgb = np.array(hex_to_RGB(c2))/255
    mix_pcts = [x/(n-1) for x in range(n)]
    rgb_colors = [((1-mix)*c1_rgb + (mix*c2_rgb)) for mix in mix_pcts]
    return ["#" + "".join([format(int(round(val*255)), "02x") for val in item]) for item in rgb_colors]

In [None]:
encoded_histogram[0][0]

In [None]:
latent_encoding

In [None]:
import numpy as np
from sklearn.datasets import make_blobs
from sklearn.manifold import TSNE
import umap.umap_ as umap
import matplotlib.pyplot as plt

# Generate random data using make_blobs
n_samples = 1000
n_features = 50
n_clusters = 5
X, y = make_blobs(n_samples=n_samples, n_features=n_features, centers=n_clusters, random_state=42)

# Apply t-SNE
tsne = TSNE(n_components=2, random_state=42)
tsne_result = tsne.fit_transform(X)

# Apply UMAP
umap_instance = umap.UMAP(n_components=2, random_state=42)
umap_result = umap_instance.fit_transform(X)

# Plot t-SNE visualization
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.scatter(tsne_result[:, 0], tsne_result[:, 1], c=y)
plt.title("t-SNE Visualization")

# Plot UMAP visualization
plt.subplot(1, 2, 2)
plt.scatter(umap_result[:, 0], umap_result[:, 1], c=y)
plt.title("UMAP Visualization")

plt.tight_layout()
plt.show()
