In [None]:
import pandas as pd
from typing import List
from scipy.linalg import lstsq
import numpy as np
import matplotlib.pyplot as plt
from pandas import (
    Timestamp,
)
import math
df = pd.read_excel('files/smspecs_yearly.xlsx', sheet_name=None,  index_col=0)
class Variant:
    _df:pd.DataFrame=None
    dates:List[Timestamp]=None
    base:'Variant'=None
    # __slots__ = ['full_name', 'dates', '_df', '_trendC00', '_r2']
    @classmethod
    def from_xls_smspecs_yearly(cls, df, key, base=None):
        obj :Variant= cls()
        obj.base = base
        obj.full_name = key
        obj.dates = []
        for bin_var, date_line in zip(key.split('-')[0], key.split('-')[1:]):
            if bin_var == '1':
                if date_line =='xxxx':
                    obj.dates.append(pd.to_datetime('2013-01-01', format='%Y-%m-%d'))
                else:
                    obj.dates.append(pd.to_datetime(f'{date_line}-01-01', format='%Y-%m-%d'))
            else:
                if date_line == 'xxxx':
                    obj.dates.append(pd.to_datetime(f'2031-01-01', format='%Y-%m-%d'))
                else:
                    obj.dates.append(pd.to_datetime(f'{date_line}-01-01', format='%Y-%m-%d'))
        
        obj._df:pd.DataFrame = df / 10**9        
        obj._trendC00 = None
        obj._RGI = None        
        return obj
    @property
    def _get_totals_by_event(self)->np.ndarray:
        # return self._df[self._df.index.isin(self.dates)].total.to_numpy()
        # return [None if date is None else self._df.loc[date].total for date in self.dates]
        df_res = pd.DataFrame(index=self.dates,  data={'total':[0]*len(self.dates)})
        return df_res
        
    @property
    def get_borders(self)->pd.DataFrame:
        df_res = pd.merge(self._df, self._get_totals_by_event, left_index=True, right_index=True,how='right')
        df_res = df_res.rename(columns={'total_x':'total'})     
        df_res = df_res.drop(['total_y','yearly'], axis=1)
        df_res = df_res.fillna(math.inf)
        return df_res
    @property
    def name(self) -> int: return int(self.full_name.split('-')[0],base=2)
    def __repr__(self) -> str:
        return '\n'.join([
            repr(self._df),
            f'name={self.name:03b} dates={self.dates}',
            self.full_name
        ]) 
    @property
    def _get_trends(self)->List[float]:
        if self._trendC00 is None:
            self._trendC00 = []
            df_c = self._df
            min_val = - math.inf
            for cur_total in self.get_borders.total:
                deg = 3
                x_ar = df_c[(min_val <= df_c.total) & (df_c.total <=cur_total)].total.to_numpy()
                y_ar = df_c[(min_val <= df_c.total) & (df_c.total <=cur_total)].yearly.to_numpy()
                if len(x_ar) == 0:
                    self._trendC00.append(self._trendC00[-1])
                else:
                    a_matrix = np.vstack([[v ** p for p in range(deg+1)] for v in x_ar])                
                    self._trendC00.append(lstsq(a_matrix, y_ar)[0])
                
                min_val = cur_total
        return self._trendC00    
    def get_trend_val(self, val:float)->float:
        for c00, total in zip(self._get_trends, self.get_borders.total):
            if val < total:
                return sum([v * (val ** pow) for pow, v in enumerate(c00)])
        assert True, f'Чо то с трендами, {self}'
    def show(self):
        fig, axs = plt.subplots(ncols=2)
        axs[0].plot(self._df.total, self._df.yearly, '.')
        x_arr = np.linspace(self._df.total.min(), self._df.total.max(), 500)
        axs[0].plot(x_arr, [ self.get_trend_val(x) for x in x_arr])
        axs[0].set_xlim(0)
        # axs[1].set_xlim(0)
        if not self.base is None:
            dq = self.get_dq
            axs[1].plot(dq.x, dq.dq)
        plt.show()
    @property
    def get_dq(self)->pd.DataFrame:
        x_arr = self.base._df[self.base._df.total >= self.get_borders.total[0]].total
        return pd.DataFrame({
            'x':x_arr,
            'dq':[
                    self.get_trend_val(total) - self.base.get_trend_val(total)
                for total in x_arr]                
        })
base_var = Variant.from_xls_smspecs_yearly(df['000-xxxx-xxxx-xxxx'],'000-xxxx-xxxx-xxxx') 
arr_vars = [
    Variant.from_xls_smspecs_yearly(df[key],key, base_var)
    for key in filter(lambda k: k != '000-xxxx-xxxx-xxxx', df.keys())]

# base_var

In [None]:


class Proxy:
    # __slots__ = ['base_Variant', 'arr_Variant']
    def __init__(self, arr_Variant:List[Variant]) -> None:
        self.arr_Variant = arr_Variant
    def get_var_by_name(self,name:int)->pd.DataFrame: return next(filter(lambda x: x.name == name,self.arr_Variant))
    def show(self): 
        fig, axs = plt.subplots(nrows=3, figsize=(10, 20))
        for ax, num in zip(axs,[1,2,4]):
            ax.set_ylim(0)
            for var in filter(lambda var: var.name in [num], self.arr_Variant):            
                df = var.get_dq
                line, = ax.plot(df.x, df.dq, 'o-')
                line.set_label(var.full_name)
        axs[0].legend()
        axs[1].legend()
        axs[2].legend()
        plt.show()
        
p = Proxy(arr_vars)

p.show()

In [None]:
name = '001-xxxx-xxxx-2023'
var = next(filter(lambda x: x.full_name == name ,p.arr_Variant))
# var.show()
# [v.full_name for v in p.arr_Variant if v.full_name.split('-')[0] == '100']
# print(var)
# var._get_trends
# var.get_dq
var.show()
var.get_borders

In [None]:
from scipy.interpolate import RegularGridInterpolator as RGI
x = np.array([1,2,3])
interp = RGI((x,), x**2, method='linear', bounds_error=False,fill_value=None)
interp(np.array([1,2,3,4]))