In [1]:
#import
import os
import sys

import tqdm
import math
import torch
import logging

import numpy as np
import pandas as pd
import torch.nn as nn
import lightning.pytorch as pl
import matplotlib.pyplot as plt

from scipy import spatial
from scipy.stats import chisquare, kstest
from scipy.optimize import curve_fit
from torchmetrics import MeanAbsoluteError
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import Callback, LearningRateMonitor, ModelCheckpoint

In [50]:
#model
hyperparams_dict = {
        'energy': 5.754,
        'scale_data': False,
        'augment': False,
        'add_abc': False,
        'abc_loss_factor': 1,
        'augment_factor': 20,
        'test_size': 0.001,
        'batch_size': 256,
        'net_architecture': [9,60,80,100,120,140,240,340,440,640,2000,1040,640,340,240,140,100,80,60,20,1],
        'activation_function': nn.ReLU(),
        'loss_func': 'RMSELoss()',
        'optim_func': torch.optim.Adam,
        'max_epochs': 2000,
        'es_min_delta': 0.00001,
        'es_patience': 50,
        'lr': 0.001,
        'lr_factor':0.5,
        'lr_patience': 5,
        'lr_cooldown': 20,
    }

if True:
    class RMSELoss(torch.nn.Module):
        def __init__(self, add_abc=False):
            super(RMSELoss,self).__init__()
            self.add_abc = add_abc

        @staticmethod
        def func_cos(x, a, b, c):
            return a + b*torch.cos(2*x) + c*torch.cos(x)

        def forward(self, x, y_hat, y, w, A, B, C):
            if self.add_abc:
                phi = x[:, 4]
                criterion = torch.sqrt(torch.mean(w*(y_hat - y)**2)/torch.sum(w)) + \
                            torch.mul(hyperparams_dict.get('abc_loss_factor'), torch.mean(torch.abs(w*y - self.func_cos(phi,A,B,C)))/torch.sum(w))
            else:
                criterion = torch.sqrt(torch.mean(w * (y_hat - y) ** 2) / torch.sum(w))
            return criterion

    global_losss_function = RMSELoss()

    #params
    project_name = "MSU_interpol_unified_notebooks"

    logger_path = './wandb_local_logs'
    data_path = './data/clasdb_pi_plus_n.txt'


    logger_full_path = os.path.join(logger_path, project_name, 'spring-feather-42')

    os.makedirs(logger_full_path, exist_ok=True)
    logging.basicConfig(encoding='utf-8',
                        level=logging.DEBUG,
                        format='%(asctime)s : %(levelname)s : %(message)s',
                        handlers=[logging.FileHandler(os.path.join(logger_full_path, 'logs.log'), mode='w'),
                                  logging.StreamHandler(sys.stdout)],
                        force=True)

        # define dataset and net
    class InterpolDataSet(Dataset):
        def __init__(self, features, labels, weights, A, B, C):
            self.features = features
            self.labels = labels
            self.weights = weights
            self.A = A
            self.B = B
            self.C = C
            self.len = len(labels)
    
        def __getitem__(self, index):
            feature = self.features[index]
            label = self.labels[index]
            weights = self.weights[index]
            A = self.A[index]
            B = self.B[index]
            C = self.C[index]
            return feature, label, weights, A, B, C
    
        def __len__(self):
            return self.len
    
    
    class InterpolDataModule(pl.LightningDataModule):
        def __init__(self, hyperparams):
            super().__init__()
            self.df = None
            self.hyperparams = hyperparams
            self.train_dataset = None
            self.val_dataset = None
    
        def augment(self, new_augm):
            augm = pd.Series({'Ebeam': np.random.normal(loc=new_augm.Ebeam, scale=new_augm.Ebeam / 30),
                              'W': np.random.normal(loc=new_augm.W, scale=new_augm.W / 30),
                              'Q2': np.random.normal(loc=new_augm.Q2, scale=new_augm.Q2 / 30),
                              'cos_theta': np.clip(
                                  np.random.normal(loc=new_augm.cos_theta, scale=abs(new_augm.cos_theta / 30)), -1, 1),
                              'phi': np.clip(np.random.normal(loc=new_augm.phi, scale=new_augm.phi / 30), 0, 2 * np.pi),
                              'dsigma_dOmega': np.random.normal(loc=new_augm.dsigma_dOmega, scale=new_augm.error / 3),
                              'error': new_augm.error,
                              'weight': new_augm.weight,
                              })
            if self.hyperparams.get('add_abc'):
                augm['A'] = new_augm.A
                augm['B'] = new_augm.B
                augm['C'] = new_augm.C
            else:
                pass
            return augm
    
        @staticmethod
        def func_cos(x, a, b, c):
            return a + b * np.cos(2 * x) + c * np.cos(x)
    
        def get_abc(self, df, E_beam, Q2, W, cos_theta):
            df_example_set = df[(df.Ebeam == E_beam) &
                                (df.W == W) &
                                (df.Q2 == Q2) &
                                (df.cos_theta == cos_theta)].sort_values('phi')
            # input data
            xdata = df_example_set.phi
            ydata = df_example_set.dsigma_dOmega
            ydata_error = df_example_set.error
            # fitting the data
            popt, pcov = curve_fit(self.func_cos, xdata, ydata, sigma=ydata_error, absolute_sigma=True)
            a, b, c = popt[0], popt[1], popt[2]
    
            return a, b, c
    
        def setup(self, stage):
            # data reading and preprocessing
            df = pd.read_csv(data_path, delimiter='\t', header=None)
            df.columns = ['Ebeam', 'W', 'Q2', 'cos_theta', 'phi', 'dsigma_dOmega', 'error', 'id']
            df.loc[8314:65671, 'Ebeam'] = 5.754  # peculiarity of this dataset.
            df = df[~((df.Ebeam == 5.754) & (~df.Q2.isin([1.715, 2.050, 2.445, 2.915, 3.480, 4.155])))] # peculiarity of this dataset #2
            df['phi'] = df.phi.apply(lambda x: math.radians(x))
            df['weight'] = df['error'].apply(lambda x: x and 1 / x or 100)  # x and 1 / x or 100  is just a reversed error but with validation 1/0 error in this case it will return 100
            df = df.drop('id', axis=1)
            df = df.drop_duplicates(subset=['Ebeam', 'W', 'Q2', 'cos_theta', 'phi'])
            df['cos_phi'] = df['phi'].apply(lambda x: np.cos(x))
            df['sin_phi'] = df['phi'].apply(lambda x: np.sin(x))
            df['theta'] = np.arccos(df['cos_theta'])
            df['sin_theta'] = np.sin(df.theta)
    
            df = df[df.Ebeam == hyperparams_dict.get('energy')]
    
            # #train test split
            feature_columns = ['Ebeam', 'W', 'Q2', 'theta', 'cos_theta', 'sin_theta', 'phi', 'cos_phi', 'sin_phi']
    
            df['A'] = None
            df['B'] = None
            df['C'] = None
            feature_columns_with_additional = ['Ebeam', 'W', 'Q2', 'theta', 'cos_theta', 'sin_theta', 'phi', 'cos_phi', 'sin_phi', 'weight', 'A', 'B', 'C']
    
            if self.hyperparams.get('add_abc'):
                for Ebeam in df.Ebeam.unique():
                    for Q2 in tqdm.tqdm(df[df.Ebeam == Ebeam].Q2.unique(), desc='ABC Q cycle'):
                        for W in df[(df.Ebeam == Ebeam) & (df.Q2 == Q2)].W.unique():
                            for cos_theta in df[(df.Ebeam == Ebeam) & (df.Q2 == Q2) & (df.W == W)].cos_theta.unique():
                                try:
                                    if df.loc[(df.Ebeam == Ebeam) & (df.Q2 == Q2) & (df.W == W) & (
                                            df.cos_theta == cos_theta), 'A'].iloc[0] is None:
                                        A, B, C = self.get_abc(df, Ebeam, Q2, W, cos_theta)
                                        df.loc[(df.Ebeam == Ebeam) & (df.Q2 == Q2) & (df.W == W) & (
                                                df.cos_theta == cos_theta), 'A'] = A
                                        df.loc[(df.Ebeam == Ebeam) & (df.Q2 == Q2) & (df.W == W) & (
                                                df.cos_theta == cos_theta), 'B'] = B
                                        df.loc[(df.Ebeam == Ebeam) & (df.Q2 == Q2) & (df.W == W) & (
                                                df.cos_theta == cos_theta), 'C'] = C
                                    else:
                                        pass
                                except Exception as e:
                                    df.loc[(df.Ebeam == Ebeam) & (df.Q2 == Q2) & (df.W == W) & (
                                            df.cos_theta == cos_theta), 'A'] = 0
                                    df.loc[(df.Ebeam == Ebeam) & (df.Q2 == Q2) & (df.W == W) & (
                                            df.cos_theta == cos_theta), 'B'] = 0
                                    df.loc[(df.Ebeam == Ebeam) & (df.Q2 == Q2) & (df.W == W) & (
                                            df.cos_theta == cos_theta), 'C'] = 0
            else:
                pass
    
            feature_data = df[feature_columns_with_additional]
            label_data = df['dsigma_dOmega']
    
            if self.hyperparams.get('scale_data'):
                scaler_feature = StandardScaler()
                scaler_target = StandardScaler()
                feature_data = scaler_feature.fit_transform(feature_data)
                label_data = scaler_target.fit_transform(label_data.values.reshape(-1, 1))
            else:
                pass
    
            if self.hyperparams.get('augment'):
                aug_series_list = []
                for i in tqdm.tqdm(df.itertuples()):
                    for _ in range(self.hyperparams.get('augment_factor')):
                        aug_series_list.append(self.augment(i))
    
                aug_df = pd.DataFrame(aug_series_list)
                df = pd.concat([df, aug_df])
            else:
                pass
    
            self.df = df
    
            train_feature_data, val_feature_data, train_label_data, val_label_data = train_test_split(feature_data,
                                                                                                      label_data,
                                                                                                      test_size=self.hyperparams.get(
                                                                                                          'test_size'),
                                                                                                      random_state=1438)
    
            self.train_dataset = InterpolDataSet(
                torch.tensor(train_feature_data[feature_columns].values, dtype=torch.float32),
                torch.tensor(train_label_data.values, dtype=torch.float32),
                torch.tensor(train_feature_data['weight'].values, dtype=torch.float32),
                torch.tensor(train_feature_data['A'].astype(float).values, dtype=torch.float32),
                torch.tensor(train_feature_data['B'].astype(float).values, dtype=torch.float32),
                torch.tensor(train_feature_data['C'].astype(float).values, dtype=torch.float32))
    
            self.val_dataset = InterpolDataSet(torch.tensor(val_feature_data[feature_columns].values, dtype=torch.float32),
                                               torch.tensor(val_label_data.values, dtype=torch.float32),
                                               torch.tensor(val_feature_data['weight'].values, dtype=torch.float32),
                                               torch.tensor(train_feature_data['A'].astype(float).values,
                                                            dtype=torch.float32),
                                               torch.tensor(train_feature_data['B'].astype(float).values,
                                                            dtype=torch.float32),
                                               torch.tensor(train_feature_data['C'].astype(float).values,
                                                            dtype=torch.float32))
    
        def train_dataloader(self):
            return DataLoader(dataset=self.train_dataset, batch_size=self.hyperparams.get('batch_size'), shuffle=True,
                              num_workers=0)
    
        def val_dataloader(self):
            return DataLoader(dataset=self.val_dataset, batch_size=self.hyperparams.get('batch_size'), shuffle=True,
                              num_workers=0)
    
    
    class PrintCallbacks(Callback):
        def on_train_start(self, trainer, pl_module):
            logging.info("Training is starting")
    
        def on_train_end(self, trainer, pl_module):
            logging.info("Training is ending")
    
        def on_train_epoch_end(self, trainer, pl_module):
            epoch_mean = torch.stack(pl_module.training_step_outputs).mean()
            logging.info(f"epoch: {pl_module.current_epoch}; train_loss: {epoch_mean}")
            pl_module.training_step_outputs.clear()
    
        def on_validation_epoch_end(self, trainer, pl_module):
            epoch_mean = torch.stack(pl_module.validation_step_outputs).mean()
            logging.info(f"epoch: {pl_module.current_epoch}; val_loss: {epoch_mean}")
            pl_module.validation_step_outputs.clear()
    
    
    class InterpolRegressor(pl.LightningModule):
        def __init__(self, hyperparams):
            super(InterpolRegressor, self).__init__()
    
            self.train_loss, self.train_mae, self.val_loss, self.val_mae = 0, 0, 0, 0
            self.hyperparams = hyperparams
            self.save_hyperparameters(self.hyperparams)
    
            self.mae = MeanAbsoluteError()
            self.loss_func = global_losss_function
    
            self.optim = self.hyperparams.get('optim_func')
    
            self.net_architecture = self.hyperparams.get('net_architecture')
            self.activation_function = self.hyperparams.get('activation_function')
    
            self.training_step_outputs = []
            self.validation_step_outputs = []
    
            self.net = nn.Sequential()
            for i in range(1, len(self.net_architecture)):
                self.net.append(nn.Linear(self.net_architecture[i - 1], self.net_architecture[i]))
                if i != len(self.net_architecture) - 1:
                    self.net.append(self.activation_function)
                else:
                    pass
    
        def forward(self, x):
            return self.net(x)
    
        def training_step(self, batch, batch_idx):
            x, y, w, A, B, C = batch
            y_hat = self.forward(x)
    
            loss = self.loss_func
            self.train_loss = loss.forward(x=x, y_hat=y_hat.reshape(-1), y=y, w=w, A=A, B=B, C=C)
            self.train_mae = self.mae(y_hat.reshape(-1), y)
    
            self.log('train_loss', self.train_loss, batch_size=self.hyperparams['batch_size'],
                     on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)
            self.log('train_mae', self.train_mae, batch_size=self.hyperparams['batch_size'],
                     on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)
    
            self.training_step_outputs.append(self.train_loss)
            return self.train_loss
    
        def validation_step(self, batch, batch_idx):
            x, y, w, A, B, C = batch
            y_hat = self.forward(x)
    
            loss = self.loss_func
            self.val_loss = loss.forward(x=x, y_hat=y_hat.reshape(-1), y=y, w=w, A=A, B=B, C=C)
            self.val_mae = self.mae(y_hat.reshape(-1), y)
    
            self.log('val_loss', self.val_loss, batch_size=self.hyperparams['batch_size'],
                     on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)
            self.log('val_mae', self.val_mae, batch_size=self.hyperparams['batch_size'],
                     on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)
    
            self.validation_step_outputs.append(self.val_loss)
            return self.val_loss
    
        def on_validation_epoch_end(self):
            sch = self.lr_schedulers()
            if isinstance(sch, torch.optim.lr_scheduler.ReduceLROnPlateau) and self.trainer.current_epoch != 0:
                sch.step(self.trainer.callback_metrics["val_loss"])
    
        def configure_callbacks(self):
            early_stop_callback = EarlyStopping(monitor="val_loss", mode="min",
                                                min_delta=self.hyperparams.get('es_min_delta'),
                                                patience=self.hyperparams.get('es_patience'),
                                                verbose=True)
    
            checkpoint_callback = ModelCheckpoint(save_top_k=3,
                                                  monitor="val_loss",
                                                  mode="min",
                                                  dirpath=f"{logger_full_path}/checkpoints",
                                                  filename="{exp_name}{val_loss:.5f}-{epoch:02d}")
    
            lr_monitor = LearningRateMonitor(logging_interval='epoch')
    
            print_callback = PrintCallbacks()
    
            return [early_stop_callback, checkpoint_callback, print_callback, lr_monitor]
    
        def configure_optimizers(self):
            optimizer = self.optim(self.parameters(), lr=self.hyperparams.get('lr'))
            lr_optim = ReduceLROnPlateau(optimizer=optimizer,
                                         mode='min',
                                         factor=self.hyperparams.get('lr_factor'),
                                         patience=self.hyperparams.get('lr_patience'),
                                         cooldown=self.hyperparams.get('lr_cooldown'),
                                         threshold=0.01,
                                         verbose=True)
            return {"optimizer": optimizer,
                    "lr_scheduler": {
                        "scheduler": lr_optim,
                        "interval": "epoch",
                        "monitor": "val_loss",
                        "frequency": 2,
                        "name": 'lr_scheduler_monitoring'}
                    }

    data = InterpolDataModule(hyperparams_dict)
    data.setup('test')
    df = data.df
    df_all = df[['Ebeam', 'W', 'Q2', 'theta', 'cos_theta', 'sin_theta', 'phi', 'cos_phi', 'sin_phi']]
    df_target = df[['dsigma_dOmega']]

In [51]:
df = df.drop(['A', 'B', 'C'], axis=1)
df

Unnamed: 0,Ebeam,W,Q2,cos_theta,phi,dsigma_dOmega,error,weight,cos_phi,sin_phi,theta,sin_theta
8314,5.754,1.11,1.715,-0.1,0.130900,0.24835,0.081150,12.322859,0.991445,0.130526,1.670964,0.994987
8315,5.754,1.11,1.715,-0.1,0.392699,0.31508,0.089290,11.199462,0.923880,0.382683,1.670964,0.994987
8316,5.754,1.11,1.715,-0.1,0.654498,0.33037,0.097020,10.307153,0.793353,0.608761,1.670964,0.994987
8317,5.754,1.11,1.715,-0.1,0.916298,0.22582,0.059600,16.778523,0.608761,0.793353,1.670964,0.994987
8318,5.754,1.11,1.715,0.1,0.130900,0.31506,0.095400,10.482180,0.991445,0.130526,1.470629,0.994987
...,...,...,...,...,...,...,...,...,...,...,...,...
65666,5.754,1.15,4.155,0.9,3.403392,0.24095,0.071110,14.062691,-0.965926,-0.258819,0.451027,0.435890
65667,5.754,1.15,4.155,0.9,3.926991,0.19967,0.078718,12.703601,-0.707107,-0.707107,0.451027,0.435890
65668,5.754,1.15,4.155,0.9,4.450590,0.10080,0.037233,26.857784,-0.258819,-0.965926,0.451027,0.435890
65669,5.754,1.15,4.155,0.9,4.974188,0.13921,0.065774,15.203686,0.258819,-0.965926,0.451027,0.435890


In [52]:
model = InterpolRegressor.load_from_checkpoint(f'/Users/andrey.golda/Documents/Study/MSU_interpol/wandb_local_logs/MSU_interpol_unified_notebooks_replication/dauntless-cherry-7/checkpoints/exp_name=0val_loss=0.08474-epoch=63.ckpt', hyperparams=hyperparams_dict)
model.eval()
type(model)

2024-12-04 00:16:37,228 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/wandb_local_logs/MSU_interpol_unified_notebooks_replication/dauntless-cherry-7/checkpoints/exp_name=0val_loss=0.08474-epoch=63.ckpt


/Users/andrey.golda/Library/Caches/pypoetry/virtualenvs/msu-interpol--lw2ADYE-py3.11/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:208: Attribute 'activation_function' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['activation_function'])`.


__main__.InterpolRegressor

In [53]:
if True:
    df_grid_parts = np.array_split(df, 
                                   100)
    df_grid_parts_preds = []
    for df_grid_part in tqdm.tqdm(df_grid_parts):
        df_grid_part_pred_for_pred = df_grid_part[['Ebeam', 'W', 'Q2', 'theta', 'cos_theta', 'sin_theta', 'phi', 'cos_phi', 'sin_phi']]
        dsigma_dOmega_predicted = model.to('cpu').forward(torch.tensor(df_grid_part_pred_for_pred.to_numpy(),dtype=torch.float32)).detach()

        df_grid_part['dsigma_dOmega_predicted_stage_1'] = dsigma_dOmega_predicted
        df_grid_part['dsigma_dOmega_predicted_stage_1'] = abs(df_grid_part.dsigma_dOmega_predicted_stage_1)
        df_grid_parts_preds.append(df_grid_part)

    df_grid = pd.concat(df_grid_parts_preds)

  return bound(*args, **kwds)
100%|██████████| 100/100 [00:00<00:00, 141.35it/s]


In [60]:
df_grid['dsigma_dOmega_replica_stage_2'] = df_grid.apply(lambda x: [np.random.normal(x.dsigma_dOmega_predicted_stage_1, x.error) for _ in range(100)], axis=1)

In [62]:
df_grid[[f'dsigma_dOmega_replica_{i}' for i in range(100)]] = pd.DataFrame(df_grid.dsigma_dOmega_replica_stage_2.tolist(), index= df_grid.index)

  df_grid[[f'dsigma_dOmega_replica_{i}' for i in range(100)]] = pd.DataFrame(df_grid.dsigma_dOmega_replica_stage_2.tolist(), index= df_grid.index)
  df_grid[[f'dsigma_dOmega_replica_{i}' for i in range(100)]] = pd.DataFrame(df_grid.dsigma_dOmega_replica_stage_2.tolist(), index= df_grid.index)
  df_grid[[f'dsigma_dOmega_replica_{i}' for i in range(100)]] = pd.DataFrame(df_grid.dsigma_dOmega_replica_stage_2.tolist(), index= df_grid.index)


In [70]:
df_grid.iloc[1][[f'dsigma_dOmega_replica_{i}' for i in range(100)]].std()

0.08985430786311903

In [72]:
df_grid = df_grid.drop(['dsigma_dOmega_replica_stage_2'], axis=1)

In [74]:
df_grid.to_csv('./data/df_replicas.csv', index=False)

In [75]:
df_grid.head()

Unnamed: 0,Ebeam,W,Q2,cos_theta,phi,dsigma_dOmega,error,weight,cos_phi,sin_phi,...,dsigma_dOmega_replica_90,dsigma_dOmega_replica_91,dsigma_dOmega_replica_92,dsigma_dOmega_replica_93,dsigma_dOmega_replica_94,dsigma_dOmega_replica_95,dsigma_dOmega_replica_96,dsigma_dOmega_replica_97,dsigma_dOmega_replica_98,dsigma_dOmega_replica_99
8314,5.754,1.11,1.715,-0.1,0.1309,0.24835,0.08115,12.322859,0.991445,0.130526,...,0.456779,0.242684,0.355436,0.13273,0.26377,0.229353,0.246379,0.186208,0.24892,0.368296
8315,5.754,1.11,1.715,-0.1,0.392699,0.31508,0.08929,11.199462,0.92388,0.382683,...,0.379562,0.188648,0.086467,0.278314,0.335859,0.334705,0.362535,0.322924,0.388838,0.407577
8316,5.754,1.11,1.715,-0.1,0.654498,0.33037,0.09702,10.307153,0.793353,0.608761,...,0.338091,0.323754,0.253096,0.254849,0.166222,0.17475,0.346885,0.449737,0.189549,0.257476
8317,5.754,1.11,1.715,-0.1,0.916298,0.22582,0.0596,16.778523,0.608761,0.793353,...,0.301245,0.164911,0.230826,0.369455,0.37457,0.356591,0.323423,0.298632,0.335021,0.196133
8318,5.754,1.11,1.715,0.1,0.1309,0.31506,0.0954,10.48218,0.991445,0.130526,...,0.15965,0.247945,0.207566,0.199702,0.245507,0.232958,0.120615,0.336341,0.288613,0.281205


# Analysis

In [80]:
!pip install matplotlib==3.7.3

Collecting matplotlib==3.7.3
  Downloading matplotlib-3.7.3-cp311-cp311-macosx_11_0_arm64.whl.metadata (5.7 kB)
Downloading matplotlib-3.7.3-cp311-cp311-macosx_11_0_arm64.whl (7.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.3/7.3 MB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: matplotlib
  Attempting uninstall: matplotlib
    Found existing installation: matplotlib 3.9.3
    Uninstalling matplotlib-3.9.3:
      Successfully uninstalled matplotlib-3.9.3
Successfully installed matplotlib-3.7.3

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [82]:
import seaborn as sns
sns.set_theme(style="ticks")

# Load the planets dataset and initialize the figure
planets = sns.load_dataset("planets")
g = sns.JointGrid(data=planets, x="year", y="distance", marginal_ticks=True)

# Set a log scaling on the y axis
g.ax_joint.set(yscale="log")

# Create an inset legend for the histogram colorbar
cax = g.figure.add_axes([.15, .55, .02, .2])

# Add the joint and marginal histogram plots
g.plot_joint(
    sns.histplot, discrete=(True, False),
    cmap="light:#03012d", pmax=.8, cbar=True, cbar_ax=cax
)
g.plot_marginals(sns.histplot, element="step", color="#03012d")

AttributeError: module 'matplotlib.cm' has no attribute 'register_cmap'

In [83]:
df_10 = pd.read_csv('./data/replicas/df_replicas_10.csv')

Unnamed: 0.1,Unnamed: 0,Ebeam,W,Q2,cos_theta,phi,dsigma_dOmega,error,weight,cos_phi,...,dsigma_dOmega_replica_prediction_1,dsigma_dOmega_replica_prediction_2,dsigma_dOmega_replica_prediction_3,dsigma_dOmega_replica_prediction_4,dsigma_dOmega_replica_prediction_5,dsigma_dOmega_replica_prediction_6,dsigma_dOmega_replica_prediction_7,dsigma_dOmega_replica_prediction_8,dsigma_dOmega_replica_prediction_9,dsigma_dOmega_replica_prediction_10
0,0,5.754,1.11,1.715,-0.1,0.130900,0.24835,0.081150,12.322859,0.991445,...,0.189774,0.234991,0.225459,0.208386,0.168591,0.228682,0.233257,0.237077,0.201193,0.229529
1,1,5.754,1.11,1.715,-0.1,0.392699,0.31508,0.089290,11.199462,0.923880,...,0.201751,0.270579,0.273867,0.225028,0.192843,0.219740,0.259167,0.258812,0.253236,0.256817
2,2,5.754,1.11,1.715,-0.1,0.654498,0.33037,0.097020,10.307153,0.793353,...,0.230827,0.341890,0.354447,0.275502,0.235233,0.256140,0.280812,0.290993,0.318898,0.300997
3,3,5.754,1.11,1.715,-0.1,0.916298,0.22582,0.059600,16.778523,0.608761,...,0.292635,0.460999,0.427862,0.331879,0.301013,0.329214,0.306513,0.371696,0.407954,0.369746
4,4,5.754,1.11,1.715,0.1,0.130900,0.31506,0.095400,10.482180,0.991445,...,0.229467,0.275368,0.244739,0.228003,0.183688,0.286869,0.268643,0.268183,0.232064,0.258227
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41294,41294,5.754,1.15,4.155,0.9,3.403392,0.24095,0.071110,14.062691,-0.965926,...,0.080917,0.098066,0.111499,0.097738,0.099581,0.108693,0.111772,0.085769,0.097388,0.115594
41295,41295,5.754,1.15,4.155,0.9,3.926991,0.19967,0.078718,12.703601,-0.707107,...,0.085034,0.085933,0.110537,0.108740,0.086474,0.115734,0.133853,0.084319,0.115107,0.099630
41296,41296,5.754,1.15,4.155,0.9,4.450590,0.10080,0.037233,26.857784,-0.258819,...,0.091768,0.081094,0.107655,0.110031,0.078640,0.126579,0.130909,0.091104,0.127442,0.100240
41297,41297,5.754,1.15,4.155,0.9,4.974188,0.13921,0.065774,15.203686,0.258819,...,0.101512,0.081279,0.093633,0.095544,0.089637,0.126812,0.113052,0.107118,0.123310,0.115086
