# Most informative combination of two lines

In [1]:
import os
import sys
import yaml
import shutil
import itertools as itt
from typing import List, Dict, Union, Tuple

import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

sys.path.insert(0, os.path.join("..", ".."))
sys.path.insert(1, os.path.join(".."))

from infovar import DiscreteHandler
from infovar.stats.ranking import prob_higher

from infobs.plots import Plotter
from orion_util import latex_line, latex_param 

plt.rc("text", usetex=True)

data_dir = os.path.join("..", "data", "discrete")
figures_dir = os.path.join("2_lines_discrete")

## Configuration of Handler

In [2]:
handler = DiscreteHandler()

handler.set_paths(
    save_path=data_dir
)

## Configuration of Plotter

In [3]:
plotter = Plotter(
    line_formatter=latex_line,
    param_formatter=latex_param
)

latex_comb_lines = lambda ls: plotter.lines_comb_formatter(ls, short=True)
latex_comb_params = lambda ps: plotter.params_comb_formatter(ps)

## Load data files

In [4]:
# Load references informations

with open(os.path.join(data_dir, "..", "reference.yaml"), 'r') as f:
    ref = yaml.safe_load(f)


# Define lines, parameters and physical regimes of interest

params = [
    "av",
    "g0",
    ["av", "g0"]
]

lines = [
    "13co10",
    "12cs21",
    "12co10",
    "hcn10",
    "n2hp10",
    "hcop10",
    "cch10",
    "32so21",
    "hnc10",
    "c18o10",
    "ch3oh21",
    "h2co",
    "12cn10",
    "h13cop10",
    "c3h2",
    "c17o10"
]

regimes = {
    'av': ['1', '2', '3', '4'], # Ignore lowest regime
    'g0': ['1', '2', '3', '4'], # Ignore lowest regime
}

min_pixels = 200

## Plot figures (one or two parameters)

- Without regime restriction
- With restriction on a single parameter
- With restriction on two parameters

In [5]:
def param_str(params: Union[str, List[str]]):
    if isinstance(params, str):
        return params
    return "_".join(params)

def regime_str(params: Union[str, Tuple[str, str]], reg: Dict[str, str]):
    if isinstance(params, str):
        return param_str(params) + "_" + reg[params]
    return "_".join([param_str(param) + "_" + reg[param] for param in params])

In [6]:
# Remove folder of parameters if exist and recreate them
for param in params:
    path = os.path.join(figures_dir, param_str(param))
    if os.path.isdir(path):
        shutil.rmtree(path)
    os.mkdir(path)

### Without regime restriction

In [7]:
for param in params:
    print("param:", param)

    reg = {key: 'all' for key in regimes}

    entries = [handler.read(param, line, reg) for line in lines]

    #

    infos = {
        'mi': np.zeros((len(lines), len(lines))),
        'lin': np.zeros((len(lines), len(lines))),
    }
    sigmas = {
        'mi': np.zeros((len(lines), len(lines))),
        'lin':np.zeros((len(lines), len(lines))),
    }

    #

    for line1, line2 in list(itt.combinations_with_replacement(lines, r=2)):
        if line1 == line2:
            entry = handler.read(param, line1, reg)
        else:
            entry = handler.read(param, (line1, line2), reg)

        i1, i2 = lines.index(line1), lines.index(line2)

        infos["mi"][i1, i2] = entry["mi"]
        # sigmas["mi"][i2, i1] = entry["mi-std"]
        
        infos["lin"][i1, i2] = entry["linearinfo"]
        # sigmas["lin"][i2, i1] = entry["linearinfo-std"]

    #

    path = os.path.join(figures_dir, param_str(param), f"regime_all")
    if not os.path.isdir(path):
        os.mkdir(path)

    #

    fig = plotter.plot_mi_matrix(lines, infos["mi"])
    plt.title(f"Mutual information between ${latex_comb_params(param)}$ and lines intensity")
    plt.savefig(os.path.join(path, f"{param_str(param)}_mi"), bbox_inches="tight")

    fig = plotter.plot_mi_matrix(lines, infos["lin"])
    plt.title(f"Linear information between ${latex_comb_params(param)}$ and lines intensity")
    plt.savefig(os.path.join(path, f"{param_str(param)}_lin"), bbox_inches="tight")

    plt.close('all')

param: av
param: g0
param: ['av', 'g0']


### With restriction on one parameter

In [8]:
for param, param_regime in itt.product(params, params[:2]):
    print("param:", param)
    print("param regime:", param_regime)

    best_lines = {
        'mi': np.zeros(len(regimes[param_regime])).tolist(),
        'lin': np.zeros(len(regimes[param_regime])).tolist()
    }
    confidences = {
        'mi': np.zeros(len(regimes[param_regime])).tolist(),
        'lin': np.zeros(len(regimes[param_regime])).tolist(),
    }

    for i in tqdm(range(len(regimes[param_regime]))):

        reg = {param_regime: regimes[param_regime][i]}

        infos = {
            'mi': np.zeros((len(lines), len(lines))),
            'lin': np.zeros((len(lines), len(lines)))
        }
        sigmas = {
            'mi': np.zeros((len(lines), len(lines))),
            'lin': np.zeros((len(lines), len(lines)))
        }

        for line1, line2 in list(itt.combinations_with_replacement(lines, r=2)):
            if line1 == line2:
                entry = handler.read(param, line1, reg)
            else:
                entry = handler.read(param, (line1, line2), reg)

            i1, i2 = lines.index(line1), lines.index(line2)

            infos["mi"][i1, i2], infos["mi"][i2, i1] = entry["mi"], entry["mi"]
            sigmas["mi"][i1, i2], sigmas["mi"][i2, i1] = entry["mi-std"], entry["mi-std"]

            infos["lin"][i1, i2], infos["lin"][i2, i1] = entry["linearinfo"], entry["linearinfo"]
            sigmas["lin"][i1, i2], sigmas["lin"][i2, i1] = entry["linearinfo-std"], entry["linearinfo-std"]

        tril_x, tril_y = np.tril_indices(len(lines), -1) # -1 to ignore the diagonal

        for stat in ["mi", "lin"]:
            if entry["samples"] > min_pixels:
                probs = prob_higher(infos[stat][tril_x, tril_y], sigmas[stat][tril_x, tril_y], approx=True)
                probs[np.isnan(probs)] = 0.

                order = np.argsort(probs)[::-1]
                order = order[probs[order] > 0.10] # We take the probabilities higher than 10%
                order = order[:min(order.size, 3)] # We take only the 3 first probabilities for display reasons
                
                best_lines[stat][i] = [(lines[tril_x[k]], lines[tril_y[k]]) for k in order]
                confidences[stat][i] = [probs[k] for k in order]
            else:
                best_lines[stat][i] = None
                confidences[stat][i] = None

        #

        path = os.path.join(figures_dir, param_str(param), f"regime_{param_str(param_regime)}")
        if not os.path.isdir(path):
            os.mkdir(path)

        reg_latex = plotter.regime_formatter(param_regime, ref["ranges"][param_regime][reg[param_regime]])

        #

        fig = plotter.plot_mi_matrix(lines, infos["mi"])
        plt.title(f"Mutual information between ${latex_comb_params(param)}$ and lines intensity ({reg_latex})")
        plt.savefig(os.path.join(path, f"{param_str(param)}_regime_{regime_str(param_regime, reg)}_mi"), bbox_inches="tight")
        
        fig = plotter.plot_mi_matrix(lines, infos["lin"])
        plt.title(f"Mutual information between ${latex_comb_params(param)}$ and lines intensity ({reg_latex})")
        plt.savefig(os.path.join(path, f"{param_str(param)}_regime_{regime_str(param_regime, reg)}_lin"), bbox_inches="tight")

        plt.close('all')

    _regimes = {param_regime: {k: ref["ranges"][param_regime][k] for k in regimes[param_regime]}}

    fig = plotter.plot_summary_1d(param, _regimes, best_lines["mi"], confidences["mi"])
    plt.title(f"Most informative line on ${latex_comb_params(param)}$ for each regime of ${latex_comb_params(param_regime)}$")
    plt.savefig(os.path.join(path, f"{param_str(param)}_regime_{param_str(param_regime)}_mi_summary"), bbox_inches="tight")

    fig = plotter.plot_summary_1d(param, _regimes, best_lines["lin"], confidences["lin"])
    plt.title(f"Most informative line on ${latex_comb_params(param)}$ for each regime of ${latex_comb_params(param_regime)}$")
    plt.savefig(os.path.join(path, f"{param_str(param)}_regime_{param_str(param_regime)}_lin_summary"), bbox_inches="tight")

    plt.close('all')

    print()

param: av
param regime: av


100%|██████████| 4/4 [00:23<00:00,  5.96s/it]



param: av
param regime: g0


100%|██████████| 4/4 [00:39<00:00,  9.93s/it]



param: g0
param regime: av


100%|██████████| 4/4 [00:20<00:00,  5.21s/it]



param: g0
param regime: g0


100%|██████████| 4/4 [01:27<00:00, 21.75s/it]



param: ['av', 'g0']
param regime: av


100%|██████████| 4/4 [00:20<00:00,  5.15s/it]



param: ['av', 'g0']
param regime: g0


100%|██████████| 4/4 [00:36<00:00,  9.17s/it]





### With restriction on two parameters

In [10]:
for param, params_regime in itt.product(params, params[2:]):
    print("param:", param)
    print("params regime:", params_regime)

    best_lines = {
        'mi': np.zeros((len(regimes[params_regime[0]]), len(regimes[params_regime[1]]))).tolist(),
        'lin': np.zeros((len(regimes[params_regime[0]]), len(regimes[params_regime[1]]))).tolist()
    }
    confidences = {
        'mi': np.zeros((len(regimes[params_regime[0]]), len(regimes[params_regime[1]]))).tolist(),
        'lin': np.zeros((len(regimes[params_regime[0]]), len(regimes[params_regime[1]]))).tolist()
    }

    for i, j in tqdm(list(itt.product(*[range(len(regimes[key])) for key in params_regime]))):

        reg = {
            params_regime[0]: regimes[params_regime[0]][i],
            params_regime[1]: regimes[params_regime[1]][j]
        }

        infos = {
            'mi': np.zeros((len(lines), len(lines))),
            'lin': np.zeros((len(lines), len(lines)))
        }
        sigmas = {
            'mi': np.zeros((len(lines), len(lines))),
            'lin': np.zeros((len(lines), len(lines)))
        }
        
        for line1, line2 in list(itt.combinations_with_replacement(lines, r=2)):
            if line1 == line2:
                entry = handler.read(param, line1, reg)
            else:
                entry = handler.read(param, (line1, line2), reg)
            i1, i2 = lines.index(line1), lines.index(line2)

            infos["mi"][i1, i2], infos["mi"][i2, i1] = entry.get("mi"), entry.get("mi")
            sigmas["mi"][i1, i2], sigmas["mi"][i2, i1] = entry.get("mi-std"), entry.get("mi-std")

            infos["lin"][i1, i2], infos["lin"][i2, i1] = entry.get("linearinfo"), entry.get("linearinfo")
            sigmas["lin"][i1, i2], sigmas["lin"][i2, i1] = entry.get("linearinfo-std"), entry.get("linearinfo-std")

        tril_x, tril_y = np.tril_indices(len(lines), -1)  # -1 to ignore the diagonal

        for stat in ["mi", "lin"]:
            if entry["samples"] > min_pixels:
                probs = prob_higher(infos[stat][tril_x, tril_y], sigmas[stat][tril_x, tril_y], approx=True)
                probs[np.isnan(probs)] = 0.

                order = np.argsort(probs)[::-1]
                order = order[probs[order] > 0.10] # We take the probabilities higher than 10%
                order = order[:min(order.size, 3)] # We take only the 3 first probabilities for display reasons
                
                best_lines[stat][i][j] = [(lines[tril_x[k]], lines[tril_y[k]]) for k in order]
                confidences[stat][i][j] = [probs[k] for k in order]
            else:
                best_lines[stat][i][j] = None
                confidences[stat][i][j] = None

        #

        path = os.path.join(figures_dir, param_str(param), f"regime_{param_str(params_regime)}")
        if not os.path.isdir(path):
            os.mkdir(path)

        reg_latex = ', '.join([
            plotter.regime_formatter(param_regime, ref["ranges"][param_regime][reg[param_regime]])\
                for param_regime in params_regime
        ])

        #

        try:
            fig = plotter.plot_mi_matrix(lines, infos["mi"])
            plt.title(f"Mutual information between ${latex_comb_params(param)}$ and lines intensity ({reg_latex})")
            plt.savefig(os.path.join(path, f"{param_str(param)}_regime_{regime_str(params_regime, reg)}_mi"), bbox_inches="tight")

            fig = plotter.plot_mi_matrix(lines, infos["lin"])
            plt.title(f"Linear information between ${latex_comb_params(param)}$ and lines intensity ({reg_latex})")
            plt.savefig(os.path.join(path, f"{param_str(param)}_regime_{regime_str(params_regime, reg)}_lin"), bbox_inches="tight")
        except:
            pass

        plt.close('all')

    _regimes = {param_regime: {k: ref["ranges"][param_regime][k] for k in regimes[param_regime]} for param_regime in params_regime}

    fig = plotter.plot_summary_2d(param, _regimes, best_lines["mi"], confidences["mi"])
    plt.title(f"Most informative line on ${latex_comb_params(param)}$ for different regimes of ${latex_comb_params(params_regime[0])}$ and ${latex_comb_params(params_regime[1])}$")
    plt.savefig(os.path.join(path, f"{param_str(param)}_regime_{param_str(params_regime)}_mi_summary"), bbox_inches="tight")

    fig = plotter.plot_summary_2d(param, _regimes, best_lines["lin"], confidences["lin"])
    plt.title(f"Most informative line on ${latex_comb_params(param)}$ for different regimes of ${latex_comb_params(params_regime[0])}$ and ${latex_comb_params(params_regime[1])}$")
    plt.savefig(os.path.join(path, f"{param_str(param)}_regime_{param_str(params_regime)}_lin_summary"), bbox_inches="tight")
    
    plt.close('all')

    print()

param: av
params regime: ['av', 'g0']


  0%|          | 0/16 [00:00<?, ?it/s]

100%|██████████| 16/16 [03:35<00:00, 13.46s/it]



param: g0
params regime: ['av', 'g0']


100%|██████████| 16/16 [04:31<00:00, 17.00s/it]



param: ['av', 'g0']
params regime: ['av', 'g0']


100%|██████████| 16/16 [03:07<00:00, 11.71s/it]



