In [None]:
%%capture
import ROOT
import glob
import math
import sys
import numpy as np
import pandas as pd
from IPython.display import display, Markdown, HTML
import ipywidgets as widgets
from TPCQCVis.src.drawHistograms import *
from TPCQCVis.src.drawTrending import *
from TPCQCVis.src.drawMultiTrending import *
from TPCQCVis.src.checkHistograms import *
from TPCQCVis.src.checkTrending import *
import warnings
from copy import copy
warnings.filterwarnings('ignore')

In [None]:
%jsroot on
display(HTML("<style>.container { width:95% !important; align-items: center;}</style>"))
display(HTML("<style>table {float:left;}</style>"))
ROOT.gErrorIgnoreLevel = ROOT.kError
#display(HTML('<style>{}</style>'.format(CSS)))
#ROOT.gStyle.SetPalette(57)

In [None]:
def getMedianHistogram(hists):
    from statistics import median

    if len(hists) == 0:
        raise ValueError("Histogram list is empty")

    # Determine if histograms are 1D or 2D
    is2D = isinstance(hists[0], ROOT.TH2)

    if len(hists) == 1:
        return hists[0]

    if is2D:
        # 2D histogram case
        nBinsX = hists[0].GetNbinsX()
        nBinsY = hists[0].GetNbinsY()
        xMin = hists[0].GetXaxis().GetXmin()
        xMax = hists[0].GetXaxis().GetXmax()
        yMin = hists[0].GetYaxis().GetXmin()
        yMax = hists[0].GetYaxis().GetXmax()
        medianHist = ROOT.TH2F("median_" + hists[0].GetName(), "Median " + hists[0].GetTitle(), nBinsX, xMin, xMax, nBinsY, yMin, yMax)
        for xBin in range(1, nBinsX + 1):
            for yBin in range(1, nBinsY + 1):
                vals = [h.GetBinContent(xBin, yBin) for h in hists]
                medianHist.SetBinContent(xBin, yBin, median(vals))
    else:
        # 1D histogram case
        nBins = hists[0].GetNbinsX()
        xMin = hists[0].GetXaxis().GetXmin()
        xMax = hists[0].GetXaxis().GetXmax()
        medianHist = ROOT.TH1F("median_" + hists[0].GetName(), "Median " + hists[0].GetTitle(), nBins, xMin, xMax)
        for xBin in range(1, nBins + 1):
            vals = [h.GetBinContent(xBin) for h in hists]
            medianHist.SetBinContent(xBin, median(vals))

    return medianHist

In [None]:
def normalize_histogram(hist):
    if isinstance(hist[0], list):  # Check if the histogram is 2D
        # Convert 2D histogram bin contents to a probability distribution
        total = sum(sum(row) for row in hist)  # Sum all elements in the 2D histogram
        return [[h / total for h in row] for row in hist]
    else:
        # Convert 1D histogram bin contents to a probability distribution
        total = sum(hist)  # Sum all elements in the 1D histogram
        return [h / total for h in hist]

def calculate_kl_divergence(P, Q):
    # Flatten 2D histograms if necessary
    if isinstance(P[0], list):
        P = [item for sublist in P for item in sublist]
    if isinstance(Q[0], list):
        Q = [item for sublist in Q for item in sublist]

    # Calculate KL Divergence between two probability distributions P and Q
    return sum(p * math.log(p / q) for p, q in zip(P, Q) if p != 0 and q != 0)

def assign_quality(hists, medianHist):
    # Normalize the median histogram
    Q = normalize_histogram(medianHist)
    
    quality_scores = []
    for hist in hists:
        # Normalize the current histogram
        P = normalize_histogram(hist)
        
        # Calculate KL Divergence
        kl_div = calculate_kl_divergence(P, Q)
        quality_scores.append(kl_div)
        
    # Calculate the mean of the quality scores
    mean_score = sum(quality_scores) / len(quality_scores)

    # Calculate the variance of the quality scores
    variance = sum((score - mean_score) ** 2 for score in quality_scores) / len(quality_scores)

    # Calculate the standard deviation (sigma) of the quality scores
    sigma = math.sqrt(variance)

    quality = []
    for hist in hists:
        Q = normalize_histogram(hist)
        kl_div1 = calculate_kl_divergence(P, Q)
    
        # Assign quality based on KL divergence thresholds
        if abs(kl_div1) < sigma:
            quality.append("good")
        elif abs(kl_div1) < 2*sigma:
            quality.append("medium")
        else:
            quality.append("bad")
    
    return quality

def print_quality_scores(hists, medianHist, percent_diff=.20):
    # Normalize the median histogram
    Q = normalize_histogram(medianHist)
    kl_divergences = []

    # First pass: calculate KL divergences to determine the mean
    for hist in hists:
        P = normalize_histogram(hist)
        kl_div = calculate_kl_divergence(P, Q)
        kl_divergences.append(kl_div)

    # Calculate the mean of KL divergences
    mean_kl_div = sum(kl_divergences) / len(kl_divergences)

    # Calculate the variance of the quality scores
    variance = sum((score - mean_kl_div) ** 2 for score in kl_divergences) / len(kl_divergences)

    # Calculate the standard deviation (sigma) of the quality scores
    sigma = math.sqrt(variance)
   
    # Define thresholds based on mean and percentage difference
    threshold_good = mean_kl_div * (1 - percent_diff)
    threshold_bad = mean_kl_div * (1 + percent_diff)

    # Second pass: categorize histograms based on calculated thresholds
    for i, (hist, kl_div) in enumerate(zip(hists, kl_divergences)):
        if kl_div <= sigma:
            quality = "good"
        elif kl_div <= 2*sigma:
            quality = "medium"
        else:
            quality = "bad"
        print(f"Histogram {i+1}: Quality is {quality} (KL Divergence: {kl_div})")


## Read files

In [None]:
# Notebook variables
periodName = "LHC23o"
passName = "apass4"
runNumber = 123456
path = "/cave/alice-tpc-qc/data/2023/"



print(runList)

### Get object directory structure

In [None]:
directories = [key.GetTitle() for key in rootDataFile[0].GetListOfKeys() if key.GetClassName() == "TDirectoryFile"]
print(directories)

## Create output file

In [None]:
# Use string formatting to dynamically set the output file name
outputFileName = f"{path}/{periodName}/{passName}/{periodName}_{passName}.root"
# Create the output file with the dynamically set name
outputfile = ROOT.TFile(outputFileName, "RECREATE")

# Create the required directories and immediately write them
medianHists = outputfile.mkdir("medianHists")
medianHists.Write()  # Write the directory to ensure it's created

medianHistsSubDirs = [medianHists.mkdir(directory) for directory in directories]
for subDir in medianHistsSubDirs:
    subDir.Write()  # Write the subdirectory

trendings = outputfile.mkdir("trendings")
trendings.Write()  # Write the directory
trendingsSubDirs = [trendings.mkdir(directory) for directory in directories]
for subDir in trendingsSubDirs:
    subDir.Write()  # Write the subdirectory
    
kl_div = outputfile.mkdir("kl_div_trendings")
kl_div.Write()  # Write the directory
kl_divSubDirs = [kl_div.mkdir(directory) for directory in directories]
for subDir in kl_divSubDirs:
    subDir.Write()  # Write the subdirectory

# Ensure that the output file knows the current directory structure
outputfile.SaveSelf(True)

### Make trending plots

In [None]:
qualityDF = pd.DataFrame({'runNumber': runList})

In [None]:
trending = "mean"
error = "meanError"

for dirID, directory in enumerate(directories):
    objects = [key.GetName() for key in rootDataFile[0].Get(directory).GetListOfKeys() if "TH1" in key.GetClassName()]
    for objectName in objects:
        if "hdEdxTotMIP_" in objectName:
            trending = "fit(gaus,Sq,N,40,60)"
        else:
            trending = "mean"

        # Draw trending
        [trend, canvas] = drawTrending(objectName, rootDataFile, names=runList, namesFromRunList=True, trend=trending, error=error, log="none", axis=1)

        # Check trending and quality, which returns the desired objects
        [qualities, canvas] = checkTrending(trend, canvas=canvas, thresholds={"GOOD": 1.5, "MEDIUM": 3, "BAD": 6})
        qualityDF[objectName+"_trend"] = qualities

        # Navigate to the desired directory
        outputfile.cd(trendingsSubDirs[dirID].GetPath())
        canvas.Write()

### Create median histogram and kl divergence trending

In [None]:
for dirID, directory in enumerate(directories):
    objects = [key.GetName() for key in rootDataFile[0].Get(directory).GetListOfKeys() if "TH" in key.GetClassName()]
    for objectName in objects:
        [hist, legend, canvas, pad1] = drawHistograms(objectName, rootDataFile, normalize=True)
        medianHist = getMedianHistogram(hist)

        if medianHist is None:
            continue  # Skip to the next iteration of the loop

        # Navigate to the desired directory
        outputfile.cd(medianHistsSubDirs[dirID].GetPath())
        medianHist.Write()

        # Draw trending
        [trend, canvas] = drawTrending(objectName, rootDataFile, names=runList, namesFromRunList=True, trend="kl_divergence", error="", axis=1, meanHistogram=medianHist)

        # Check trending and quality, which returns the desired objects
        [qualities, canvas] = checkTrending(trend, canvas=canvas, thresholds={"GOOD": 1.5, "MEDIUM": 3, "BAD": 6})
        qualityDF[objectName+"_klDiv"] = qualities

        # Navigate to the desired directory
        outputfile.cd(kl_divSubDirs[dirID].GetPath())
        canvas.Write()

In [None]:
myPalette = np.array([920, 414, 801, 633],dtype=np.int32)
ROOT.gStyle.SetPalette(4,myPalette)
ROOT.gStyle.SetGridStyle(1)
qualityLabels = {"NULL":0,"GOOD":1,"MEDIUM":2,"BAD":3}

canvas = ROOT.TCanvas("qualityMatrix","",1000,500)
canvas.SetLeftMargin(0.15)
canvas.SetBottomMargin(0.15)
canvas.SetRightMargin(0.15)
canvas.SetGrid()

qualityHist = ROOT.TH2I("qualityMatrix","Quality Matrix",
                        len(qualityDF.index),min(qualityDF.index),max(qualityDF.index)+1,
                        len(qualityDF.columns)-1,0,len(qualityDF.columns)-1)
qualityHist.SetCanExtend(ROOT.TH1.kAllAxes)
qualityHist.SetStats(0)
for runIndex,run in enumerate(qualityDF.runNumber):
    qualityHist.GetXaxis().SetBinLabel(runIndex+1,str(run))
    qualityHist.GetXaxis().SetTickLength( 0.03)
    for checkIndex,check in enumerate(qualityDF.loc[:, qualityDF.columns != "runNumber"].columns):
        #print(checkIndex,runIndex)
        qualityHist.Fill(run,check,qualityLabels.get(qualityDF.iloc[runIndex][check]))

qualityHist.LabelsOption("u")
qualityHist.Draw("COLZ")
qualityHist.GetZaxis().SetRangeUser(-0.5,3.5)
qualityHist.GetZaxis().SetTitle("Quality")
qualityHist.LabelsOption("v")

# Update the canvas
canvas.Update()

outputfile.cd()
canvas.Write()

In [None]:
outputfile.Close()