In [1]:
import locale
# Set to German locale to get comma decimal separater
locale.setlocale(locale.LC_NUMERIC, "de_DE.UTF-8")

import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns
import scienceplots
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from matplotlib.backends.backend_pgf import FigureCanvasPgf

matplotlib.backend_bases.register_backend('pdf', FigureCanvasPgf)

plt.rcParams['axes.formatter.use_locale'] = True

# use fancy style and tex in plt
plt.style.use(['science', 'russian-font'])
matplotlib.rc('text', usetex = True)
preamble = plt.rcParams.get('text.latex.preamble')
params = {
  'pgf.rcfonts' : False,
  'pgf.texsystem' : 'pdflatex',
  'pgf.preamble' : "\n".join([preamble,r'\usepackage{textcomp,mathcomp}'])

}
plt.rcParams.update(params)
plt.rcParams.update({"axes.grid" : True, "grid.linestyle" : (0, (5, 10))})

np.random.seed(0)

import os
current_dir = os.getcwd().lower()

OUTPUT_DIR = 'processor_output'
# Create output folder if it does not exist
if (not os.path.exists(f'./{OUTPUT_DIR}')):
    os.makedirs(OUTPUT_DIR)

In [None]:
import itertools
import math
def gen_bars(df, col, err):
  df = df.copy()
  df[col+'_hb'] = df[col] + err
  df[col+'_lb'] = df[col] - err
  return df

def gen_col_with_bars(df, col, func, *params):
  lb = pd.Series([math.inf] * len(df), index=df.index)
  hb = pd.Series([-math.inf] * len(df), index=df.index)
  for i in itertools.product(*[[col+'_hb', col+'_lb'] for col in params]):
    val = df.apply(lambda x: func(**{key[:-3]: x[key] for key in i}), axis=1)
    lb = np.minimum(lb, val)
    hb = np.maximum(hb, val)
  df = df.copy()
  df[col] = df.apply(lambda x: func(**{key: x[key] for key in params}), axis=1)
  df[col+'_hb'] = hb
  df[col+'_lb'] = lb
  return df

def to_rus_str(col):
  return col.astype('str').str.replace('.', ',')

def gen_latex_col(df, col, round, err=None, mult=1):
  if round > 0:
    fround = lambda x: x.round(round)
  else:
    fround = lambda x: x.astype(int)

  if err is None:
    err = (df[col+'_hb'] - df[col+'_lb']) / 2
  err = to_rus_str(fround(err) * mult)
  newcol = to_rus_str(fround(df[col]) * mult)
  return "$" + newcol + " \pm " + err + "$"

def get_err_from_bars(df, col):
  return (df[col+'_hb'] - df[col+'_lb']) / 2

def get_errs(df, col):
  return np.vstack((df[col+'_hb'] - df[col],  df[col] - df[col+'_lb']))

def get_random_sample(df, column, err=None):
  if err is None:
    return np.random.normal(df[column], (df[column+'_hb'] - df[column+'_lb']) / 2)
  else:
    return np.random.normal(df[column], err)

def simulate(df, xcol, ycol, num=1000, xerr=None, yerr=None):
  np.random.seed(0)
  slopes = []
  intercepts = []
  for i in range(num):
    x = get_random_sample(df, xcol, err=xerr)
    y = get_random_sample(df, ycol, err=yerr)

    coef = np.polyfit(x, y, 1)
    slopes.append(coef[0])
    intercepts.append(coef[1])

  return np.array(slopes), np.array(intercepts)

def get_slopes_stat(df, xcol, ycol):
  slopes, _ = simulate(df, xcol, ycol)
  return {'mean': np.mean(slopes), 'std': np.std(slopes)}

def get_sim_stat(df, xcol, ycol, xerr=None, yerr=None):
  slopes, intercepts = simulate(df, xcol, ycol, xerr=xerr, yerr=yerr)
  return {
    'slopes': {'mean': np.mean(slopes), 'std': np.std(slopes)},
    'intercepts': {'mean': np.mean(intercepts), 'std': np.std(intercepts)}
  }

In [None]:
#CONSTS