In [3]:
import pandas as pd
import numpy as np
import scipy.optimize as sc
import matplotlib.pyplot as plt
import json
from tqdm import tqdm

# Обработка данных из таблиц ресурсов и использования товаров и услуг

In [4]:
def read_and_formalize_tri_data(file_name: str = '../data/raw_data/TRI_2019.xlsx',
                                sheet_name: str = 'М-отеч',
                                header: int = 3):

    df = pd.read_excel(file_name, sheet_name=sheet_name, header=header)
    df = df.drop(['Unnamed: 1', 'Unnamed: 2'], axis=1).set_index('№№')
    df.columns = df.columns.astype(int)
    
    idx = list(np.arange(1, 66)) + [72]
    col = list(np.arange(1, 62)) + [71]
    
    df = (
        df
        .loc[idx, col]
        .append(pd.DataFrame(df.loc[62:65, col].sum(),
                             columns=['rest_production']).transpose())
        .rename(columns={71: 'utilisation'},
                index={72: 'VDS'})
        .drop([62, 63, 64, 65], axis=0)
    )
    return df

def agregate_array_by_14_branches(array_inp: np.array,
                                  idx_by_line: list):

    n_lines = len(idx_by_line)
    array_agg = np.zeros(n_lines)
    for i in range(0, n_lines):
        idx_line = idx_by_line[i]
        for idx in idx_line:
            if idx['begin'] != idx['end']:
                array_agg[i] += array_inp[idx['begin'] : idx['end']].sum()
            else:
                array_agg[i] += array_inp[idx['begin']]
    return np.array([array_agg]) 

def set_transition_matrix(idx_by_line: list,
                          n_cols: int = 61):

    n_lines = len(idx_by_line)
    T = np.zeros((n_lines, n_cols))
    for i in range(0, n_lines):
        idx_line = idx_by_line[i]
        for idx in idx_line:
            if idx['begin'] != idx['end']:
                T[i, idx['begin']:idx['end']] = np.ones(idx['end'] - idx['begin'])
            else:
                T[i, idx['begin']] = 1
    return T

def preprocess_tri_data(file_raw: str = '../data/raw_data/TRI_2019.xlsx',
                        file_algo: str = '../data/algo_data/transition_matrix_idx_by_line.json'):

    df = read_and_formalize_tri_data(file_raw)
    
    f = open(file_algo)
    idx_by_line = json.load(f)
    f.close()

    T = set_transition_matrix(idx_by_line)

    vds = np.array(df.loc['VDS'])[:-1]
    vds_short = agregate_array_by_14_branches(vds, idx_by_line)

    rest_production = np.array(df.loc['rest_production'])[:-1]
    rest_production_short = agregate_array_by_14_branches(rest_production, idx_by_line)

    short_mat = T @ np.array(df)[:61, :61] @ T.T

    utilisation_short = short_mat.sum(0) + vds_short + \
                        rest_production_short - short_mat.sum(1)
    utilisation_short = np.array([np.append(utilisation_short, [0, 0])])

    short_mat = np.concatenate([short_mat, vds_short, rest_production_short], axis=0)
    short_mat = np.concatenate([short_mat.T, utilisation_short]).T

    df_short = pd.DataFrame(short_mat)
    df_short.columns = list(np.arange(1, 15)) + ['utilisation']
    df_short.index = list(np.arange(1, 15)) + ['vds', 'rest_production']
    
    return df_short

In [5]:
df_2020 = preprocess_tri_data('../data/raw_data/TRI_2020.xlsx')
df_2019 = preprocess_tri_data('../data/raw_data/TRI_2019.xlsx')

  df
  df


# Обработка данных по занятости

In [6]:
def preprocess_labour_data(base_year: str = '2019',
                           file_raw: str = '../data/raw_data/non_tri_data.xlsx',
                           file_names: str = '../data/algo_data/branch_names_eng.json'):
    df_labour_by_region = pd.read_excel(file_raw)
    df_labour_by_region = (
        df_labour_by_region
        [['Unnamed: 0',
          'Численность рабочей силы, '+base_year+' (тыс. человек)']]
        .rename(columns = {
            'Unnamed: 0': 'Region',
            'Численность рабочей силы, '+base_year+' (тыс. человек)': 'Workforce'
        })
        .assign(Workforce_int = lambda df: df.Workforce.astype(int))
        .drop('Workforce', axis=1)
        .rename(columns={'Workforce_int': 'Workforce'})
    )
    
    f = open(file_names)
    industry_names = json.load(f)
    n_industry = len(industry_names)
    f.close()
    
    df_labour_by_industry = (
        pd.read_excel(file_raw, sheet_name='Labour_by_industry_'+base_year)
    )
    df_labour_by_industry.columns = ['Region'] + industry_names
    
    n_labour = np.array(df_labour_by_region['Workforce'])
    full_labour = np.array(df_labour_by_industry)[:, 1:] * \
                  np.tile(n_labour.T, (n_industry, 1)).T
    
    df_full_labour = pd.DataFrame(full_labour,
                              columns=industry_names,
                              index=df_labour_by_industry.index)
    df_full_labour.insert(0, 'Region', df_labour_by_industry['Region'])
    
    return df_full_labour, df_labour_by_region, df_labour_by_industry

In [7]:
df_full_labour_2019, _, _ = preprocess_labour_data('2019')
df_full_labour_2020, _, _ = preprocess_labour_data('2020')

# Обработка данных по ВДС

In [8]:
def preprocess_vds_data(base_year: str = '2019',
                        file_name: str = '../data/raw_data/non_tri_data.xlsx',
                        file_names: str = '../data/algo_data/branch_names_vds.json'):
    f = open(file_names)
    names_vds = json.load(f)
    n_industry = len(names_vds)
    f.close()
    df_vds = pd.read_excel(file_name,
                           sheet_name='VDS_by_industry_'+base_year).drop(['Всего'], axis=1)
    
    df_vds_by_region = pd.read_excel('../data/raw_data/non_tri_data.xlsx',
                           sheet_name='VRP', header=1)
    
    n_vds = np.array(df_vds_by_region['Y'+base_year])
#     print(np.array(df_vds)[:, 1:])
#     print(np.tile(n_vds.T, (n_industry-1, 1)).T)
    full_vds = np.array(df_vds)[:, 1:] * \
               np.tile(n_vds.T, (n_industry-1, 1)).T / 100
#     print(df_vds)
    
    df_full_vds = pd.DataFrame(full_vds,
                              columns=names_vds[1:],
                              index=df_vds.index)
    df_full_vds.insert(0, 'Region', df_vds['Unnamed: 0'])
    
    df_vds = (
        df_vds
        .rename(columns=dict(zip(df_vds.columns, names_vds)))
        .assign(Other = lambda df: df.eval('Financial + Science + Administration + Government + Culture + Households'))
        .drop(['Financial', 'Science', 'Administration', 'Government', 'Culture', 'Households', 'Other activities'], axis=1)
    )
    
    df_full_vds = (
        df_full_vds
        .rename(columns=dict(zip(df_full_vds.columns, names_vds)))
        .assign(Other = lambda df: df.eval('Financial + Science + Administration + Government + Culture + Households',engine='python'))
        .drop(['Financial', 'Science', 'Administration', 'Government', 'Culture', 'Households', 'Other activities'], axis=1)
    )
    
    return df_vds, df_full_vds

In [9]:
df_vds_2019, df_full_vds_2019 = preprocess_vds_data('2019')
df_vds_2020, df_full_vds_2020 = preprocess_vds_data('2020')

In [10]:
# df_vds_2019.to_excel('../data/preprocessed_data/df_vds_2019.xlsx')
# df_vds_2020.to_excel('../data/preprocessed_data/df_vds_2020.xlsx')
# df_full_vds_2019.to_excel('../data/preprocessed_data/df_full_vds_2019.xlsx')
# df_full_vds_2020.to_excel('../data/preprocessed_data/df_full_vds_2020.xlsx')

# Обработка данных по ВВП

In [11]:
def preprocess_vvp_data(file_name: str = '../data/raw_data/vvp_2019_2020.xlsx',
                        file_names: str = '../data/algo_data/branch_names_vvp.json'):
    f = open('../data/algo_data/branch_names_vvp.json')
    names_vds = json.load(f)
    f.close()
    df_vvp = pd.read_excel('../data/raw_data/vvp_2019_2020.xlsx')
    df_vvp.insert(0, 'Branches', names_vds)
    df_vvp = (
        df_vvp
        .set_index('Branches')
        .drop('Unnamed: 0', axis=1)
        .transpose()
        .assign(Other = lambda df: df.eval('Financial + Science + Administration + Government + Culture + Households'))
        .drop(['Financial', 'Science', 'Administration', 'Government', 'Culture', 'Households', 'Other activities'], axis=1)
        .transpose()
        .assign(Proc = lambda df: df.eval('Y2020 / Y2019 * 100'))
    )
    return df_vvp

In [12]:
df_vvp = preprocess_vvp_data()

# Построение коэффициентов локализации

In [128]:
def get_SLQ(labour, idx_region, i):
    try:
        return (labour[idx_region, i] / labour[idx_region, :].sum() *\
                                      labour[0, :].sum() / labour[0, i])
    except Exception as e:
        return 0

def get_CILQ(labour, idx_region, i, j):
    try:
        return (labour[idx_region, i] / labour[0, i] *\
                                      labour[0, j] / labour[idx_region, j])
    except Exception as e:
        return 0 
    

def get_FLQ(labour, idx_region, i, j, delta):
    lam = np.log2(1 + labour[idx_region, :].sum() / labour[0, :].sum()) ** delta
    if i == j:
        return np.minimum(get_SLQ(labour, idx_region, i) * lam, 1)
    else:
        return np.minimum(get_CILQ(labour, idx_region, i, j), 1)

def get_t(mob, labour, idx_region, delta):
    t_ij = np.zeros(mob.shape)
    for i in range(0, 14):
        for j in range(0, 14):
            try:
                if i == j:
                    t_ij[i, j] = get_SLQ(labour, idx_region, i)
                else:
                    t_ij[i, j] = get_CILQ(labour, idx_region, i, j)
            except Exception as e:
                    t_ij[i, j] = 0
    lam = np.log2(1 + labour[idx_region, :].sum() / labour[0, :].sum()) ** delta
    t_ij = np.minimum(t_ij * lam, np.ones(mob.shape))
    return t_ij

def get_t_AFLQ(mob, labour, idx_region, delta):
    t_ij = np.zeros(mob.shape)
    for i in range(0, 14):
        for j in range(0, 14):
            SLQ = get_SLQ(labour, idx_region, j)
            FLQ = get_FLQ(labour, idx_region, i, j, delta)
            try:
                if SLQ > 1:
                    t_ij[i, j] = FLQ * np.log2(1 + SLQ)
                else:
                    t_ij[i, j] = FLQ
            except Exception as e:
                    t_ij[i, j] = 0
    t_ij = np.minimum(t_ij, np.ones(mob.shape))
    return t_ij

def get_regional_df(idx_region, df_mob, df_labour, df_vds):
    n_branch = 14
    mob_full = np.array(df_mob)
    mob = mob_full[:n_branch, :n_branch]
    labour = np.array(df_labour)[:, 1:]
    
    X_n = mob_full.sum(0)[:n_branch]
    X_r = labour[idx_region, :] /labour[0, :] * X_n
    vds = np.array(df_vds.loc[idx_region])[1:] / \
          np.array(df_vds.loc[0])[1:] * mob_full[-2, :-1]
    a_ij_n = mob / np.tile(X_n, (n_branch, 1))
    
    t_ij = get_t_AFLQ(mob, labour, idx_region, 0.01)
    a_ij_r = a_ij_n * t_ij
    Z_r = a_ij_r * np.tile(X_r, (14, 1))
    use_r = (np.eye(mob.shape[0]) - a_ij_r) @ X_r

    mob_new = Z_r
    use_r = (np.eye(mob.shape[0]) - a_ij_r) @ X_r
    mob_new = np.concatenate([mob_new,
                              np.array([vds]),
#                               np.array([X_r - mob_new.sum(0)-vds])], axis=0)
                              np.array([mob_new.sum(0)+vds])], axis=0)
    use_r = np.array([np.append(use_r, [0, 0])])
    mob_new = np.concatenate([mob_new.T, use_r]).T
    
    df_mob_reg = pd.DataFrame(mob_new,
                              columns=list(df_labour.columns[1:]) + ['Usage'],
                              index=list(df_labour.columns[1:]) + ['VDS', 'Production'])
    return df_mob_reg

In [101]:
idx_region = 76 # novosib
idx_region = 77 # omsk
idx_region = 10 # lipezk
idx_region = 27 # murmansk
# idx_region = 38 # rostov

base_year = '2019'

df_mob = pd.read_excel('../data/preprocessed_data/df_tri_'+base_year+'.xlsx').set_index('Unnamed: 0')
df_labour = pd.read_excel('../data/preprocessed_data/df_labour_'+base_year+'.xlsx')
df_vds = pd.read_excel('../data/preprocessed_data/df_full_vds_'+base_year+'.xlsx').set_index('Unnamed: 0')

df_reg = get_regional_df(idx_region, df_mob, df_labour, df_vds)
df_reg

Unnamed: 0,"Agriculture, forestry",Mining,Processing industries,Electric power supply and other,"Water supply, sanitation and other",Construction,Wholesale and retail trade,Transportation and storage,Operation of hotels and other,Information and communication,Operations with real estate,Education,Healthcare,Other activities,Usage
"Agriculture, forestry",1371.970764,0.219184,6708.954964,8.872205,0.142946,35.803304,20.157822,17.732499,116.251145,0.135799,1.022067,18.840333,20.062525,111.660184,10444.447214
Mining,8.495562,15519.244973,26145.49776,11736.674048,26.568932,669.303651,1061.246135,932.500356,0.691039,0.335022,7.612448,3.60766,8.371951,97.180264,193724.36716
Processing industries,2853.62866,3829.379347,44011.924863,1958.806747,1138.300661,12762.143283,2580.081568,8056.013827,1504.824401,928.120119,1435.536874,484.445115,1560.251823,4846.656465,145547.489969
Electric power supply and other,348.003007,5230.780278,6885.271113,44983.311318,815.862689,300.073259,788.879711,3062.058361,247.641672,270.315014,2309.047101,1050.747011,730.434742,2050.463696,53656.850873
"Water supply, sanitation and other",14.660969,128.696947,2029.959984,546.952978,1650.406212,62.037168,127.815005,119.167966,38.088445,22.891585,542.214736,120.067824,125.752185,590.058858,6741.009662
Construction,56.441231,1047.344621,837.172667,428.913676,98.336092,929.19728,218.250556,688.964768,62.301273,45.209384,1449.731597,598.699948,414.16033,2300.272079,39113.331383
Wholesale and retail trade,730.925823,736.363184,11258.637715,2792.46121,305.292581,2906.82397,1652.296941,1681.702209,354.124534,240.609851,544.960541,137.148253,770.939194,1402.41481,62533.932421
Transportation and storage,380.86768,7936.464429,12226.65487,541.494584,330.126519,1278.597266,12042.916708,18588.850884,125.548434,240.521973,142.973804,126.261619,301.100253,4485.513526,53032.460571
Operation of hotels and other,2.890191,46.277608,140.098366,30.512344,2.705937,65.299043,87.864202,117.345803,27.055803,41.978275,7.160496,181.574453,230.541396,860.936155,9762.639503
Information and communication,17.523659,133.01651,992.223613,229.898828,18.922221,147.557528,980.052871,476.065568,44.680887,4318.899841,204.468531,255.01444,145.271031,4116.834975,12792.872577


In [130]:
base_year = '2020'
df_mob = pd.read_excel('../data/preprocessed_data/df_tri_'+base_year+'.xlsx').set_index('Unnamed: 0')
df_labour = pd.read_excel('../data/preprocessed_data/df_labour_'+base_year+'.xlsx')
df_vds = pd.read_excel('../data/preprocessed_data/df_full_vds_'+base_year+'.xlsx').set_index('Unnamed: 0')

for i in tqdm(range(1, 90)):
    df_reg = get_regional_df(i, df_mob, df_labour, df_vds)
    df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')

  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/local

  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/local

  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_'+base_year+'.xlsx')
  df_reg.to_excel('../data/data_for_analysis/local

# Оценка силы шоков, влияющих на экономику региона
## Оценка шоков через численность трудящихся

In [16]:
def get_labour(year, df_labour_region, df_labour_branch):
    branch = np.array(df_labour_branch)[:, 1:]
    region = np.tile(df_labour_region.loc[:, year], (14, 1)).T
    return branch * region / 100

def get_shock_labour(idx_region: int, 
                     file_regions: str = '../data/raw_data/full_labour_regions.xlsx',
                     file_branches: str = '../data/raw_data/full_labour_branches.xlsx'):
    n_branches = 14
    n_years = 6
    
    start_year = 2015
    end_year = 2020
    
    df_labour_region = (
        pd.read_excel(file_regions)
        .rename({'Unnamed: 0': 'region'}, axis=1)
    )

    labour_branch = []
    for i in range(start_year, end_year+1):
        labour_branch.append(
            (
                pd.read_excel(file_branches, sheet_name=str(i))
                .rename({'Unnamed: 0': 'region'}, axis=1)
            )
        )

    L = []
    for i in np.arange(start_year, end_year+1):
        idx = i - start_year
        labour = get_labour(i, df_labour_region, labour_branch[idx])
        L.append(labour)
    
    L_time = []
    for i in range(0, n_branches):
        res_i = []
        for j in range(0, n_years):
            res_i.append(L[j][idx_region, i])
        L_time.append(res_i)
    L_time = np.array(L_time)
    
    t = np.arange(start_year, end_year)
    alpha = 0.3
    weights = np.exp(alpha*t) / np.exp(alpha*t).sum()
    norm_past = np.array(L_time[:, :-1] * np.tile(weights, (n_branches, 1))).sum(1)
    
    return L[-1][idx_region, :] / norm_past

## Оценка шоков через изменения в ВВП

In [17]:
def get_shock_vvp(idx_region, df_vvp, df_labour):
    coef_reg = list(df_labour.loc[idx_region])[1:] / \
               np.array(list(df_labour.loc[idx_region])[1:]).sum()
    coef_rf = list(df_labour.loc[0])[1:] / \
              np.array(list(df_labour.loc[0])[1:]).sum()
    return ((df_vvp.Proc - 100)[:14] * coef_reg / coef_rf + 100) / 100

In [18]:
f = open('../data/algo_data/branch_names_eng.json')
names = json.load(f)
f.close()

df_shocks = pd.DataFrame({})
df_shocks.insert(0, 'Branches', names)
df_shocks = df_shocks.set_index('Branches')
# df_shocks

In [19]:
# idx_region = 76 # novosib
# idx_region = 77 # omsk
# idx_region = 10 # lipezk
# idx_region = 27 # murmansk
# idx_region = 38 # rostov

for i in tqdm(range(1, 90)):
    shock_vvp = get_shock_vvp(i, df_vvp, df_labour)
    shock_labour = get_shock_labour(i)
    df_shocks.insert(0, 'labour_'+str(i), shock_labour)
    df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))

  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labou

  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labou

  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labou

  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
  df_shocks.insert(0, 'labour_'+str(i), shock_labour)
  df_shocks.insert(1, 'vvp_'+str(i), list(shock_vvp))
100%|███████████████████████

In [20]:
# df_shocks.to_excel('../data/data_for_analysis/df_shock.xlsx')

# Реализация модели межобраслевого баланса

In [21]:
def get_coef(MOB):
    Z0 = MOB[:, -1][:-2]
    A0 = Z0.sum()
    AJ = MOB.sum(0)[:-1]
    
    a = MOB[:-2, :-1] / np.tile(AJ, (MOB.shape[0] - 2, 1))
    b = MOB[-2:, :-1] / np.tile(AJ, (2, 1))
    a0 = Z0 / A0
    
    X = MOB[:-2, :-1]
    l = MOB[-2:, :-1]
    return Z0, A0, AJ, a, b, a0, X, l

def get_alpha(Z, l, a, b, AJ):
    return AJ * (1 / Z ** a).prod(0) * (1 / l ** b).prod(0)

def get_FJ(X, l, a, b, alpha):
    return (alpha * (X ** a).prod(0) * (l ** b).prod(0)).astype(float)

def get_F0(A0, X0, Z0, a0):
    return A0 * (1 / Z0 ** a0).prod() * (X0 ** a0).prod()

def get_GDP(X, FJ, F0, a, A0):
    d = np.log(FJ.astype(float))
    mu = -np.linalg.inv(np.eye(X.shape[0]) - X.T) @ d
    lam = np.exp((mu * a).sum()) / F0
    return A0 / lam

def add_shock_to_production(MOB, a, shock_mask):
    Y = MOB.sum(0)[:-1]
    X0 = (np.eye(a.shape[0]) - a) @ (Y * shock_mask)
    X = a * np.tile(Y, (a.shape[0], 1))
    return X.astype(float), X0.astype(float)

def add_shock_to_demand(MOB, a, shock_mask):
    Z = MOB[:-2, -1]
    Y = np.linalg.inv(np.eye(a.shape[0]) - a) @ (Z * shock_mask)
    X = a * np.tile(Y, (a.shape[0], 1))
    return X.astype(float), Y.astype(float)

def get_GDP_from_df(df, shock):
    MOB = np.array(df)[:, :]
    Z0, A0, AJ, a, b, a0, Z, l = get_coef(MOB)
    alpha = get_alpha(Z, l, a, b, AJ)
    FJ = lambda X: get_FJ(X, l, a, b, alpha*shock)
    F0 = lambda X0: get_F0(A0, X0, Z0, a0)
    X, X0 = add_shock_to_production(MOB, a, shock)
    return get_GDP(X, FJ(X), F0(X0), a, A0)

In [146]:
# idx_region = 38 # rostov
# str_region = 'rostov'

idx_region = 76 # rostov
str_region = 'novosib'

df_shock = pd.read_excel('../data/data_for_analysis/df_shocks_2019.xlsx')
df_2019 = pd.read_excel('../data/data_for_analysis/df_'+str_region+'_2019.xlsx').set_index('Unnamed: 0')
df_2020 = pd.read_excel('../data/data_for_analysis/df_'+str_region+'_2020.xlsx').set_index('Unnamed: 0')

shock = np.ones(14)
shock_vvp = df_shock['vvp_'+str(idx_region)]
shock_labour = df_shock['labour_'+str(idx_region)]
res_2019 = get_GDP_from_df(df_2019, shock)
res_2019_vvp = get_GDP_from_df(df_2019, shock_vvp)
res_2019_labour = get_GDP_from_df(df_2019, shock_labour)
res_2020 = get_GDP_from_df(df_2020, shock)

In [154]:
shock = np.ones(14)
shock[2] = 0.9
shock[6] = 0.9
shock[7] = 0.9
res_2019_s = get_GDP_from_df(df_2019, shock)
res_2020_s = get_GDP_from_df(df_2020, shock)


In [155]:
res_2019_s / res_2019

0.9528292531034926

In [156]:
res_2020_s / res_2020

0.9540948624564091

In [132]:
res = []
for i in tqdm(range(1, 90)):
    res_dict = {}
    res_dict['id'] = i
    df_2019 = pd.read_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_2019.xlsx').set_index('Unnamed: 0')
    df_2020 = pd.read_excel('../data/data_for_analysis/localized_data/df_'+str(i)+'_2020.xlsx').set_index('Unnamed: 0')
    shock = np.ones(14)
    shock_vvp = df_shock['vvp_'+str(i)]
    shock_labour = df_shock['labour_'+str(i)]
    res_dict['res_2019'] = get_GDP_from_df(df_2019, shock)
    res_dict['res_2019_vvp'] = get_GDP_from_df(df_2019, shock_vvp)
    res_dict['res_2019_labour'] = get_GDP_from_df(df_2019, shock_labour)
    res_dict['res_2020'] = get_GDP_from_df(df_2020, shock)
    res.append(res_dict)

  return A0 * (1 / Z0 ** a0).prod() * (X0 ** a0).prod()
  result = getattr(ufunc, method)(*inputs, **kwargs)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  a = MOB[:-2, :-1] / np.tile(AJ, (MOB.shape[0] - 2, 1))
  b = MOB[-2:, :-1] / np.tile(AJ, (2, 1))
  return A0 * (1 / Z0 ** a0).prod() * (X0 ** a0).prod()
  return A0 * (1 / Z0 ** a0).prod() * (X0 ** a0).prod()
  return A0 * (1 / Z0 ** a0).prod() * (X0 ** a0).prod()
  return A0 * (1 / Z0 ** a0).prod() * (X0 ** a0).prod()
  return A0 * (1 / Z0 ** a0).prod() * (X0 ** a0).prod()
  result = getattr(ufunc, method)(*inputs, **kwargs)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  return A0 * (1 / Z0 ** a0).prod() * (X0 ** a0).prod()
  return A0 * (1 / Z0 ** a0).prod() * (X0 ** a0).prod()
  result = getattr(ufunc, method)(*inputs, **kwargs)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  a = MOB[:-2, :-1] / np.tile(AJ, (MOB.shape[0] - 2, 1))
  b = MOB[-2:, :-1] / np.tile(AJ, (2, 1))
  r

In [133]:
df_results_for_regions1 = pd.json_normalize(res)
df_results_for_regions1

Unnamed: 0,id,res_2019,res_2019_vvp,res_2019_labour,res_2020
0,1,2.869544e+15,2.903257e+15,2.862458e+15,2.957856e+15
1,2,4.168776e+12,3.306391e+12,4.183066e+12,3.288481e+12
2,3,,,,
3,4,1.208990e+12,,1.187349e+12,2.139598e+12
4,5,5.631875e+12,5.668689e+12,5.714419e+12,5.334947e+12
...,...,...,...,...,...
84,85,2.610169e+12,2.518217e+12,2.599092e+12,2.611148e+12
85,86,9.339974e+11,6.582967e+11,9.323040e+11,8.588582e+11
86,87,2.017917e+09,,2.201566e+09,2.362027e+10
87,88,1.141355e+12,9.001574e+11,1.142669e+12,8.982473e+11


# Анализ результатов

In [134]:
df_vrp = (
    pd.read_excel('../data/raw_data/non_tri_data.xlsx',
              sheet_name='VRP',
              header=1)
    .assign(proc_2020 = lambda df: df.eval('Y2020 / Y2019 * 100'))
)
df_results_for_regions1.insert(0, 'rosstat_proc', df_vrp.loc[1:89, 'proc_2020'])

In [135]:
df_results_for_regions1 = (
    df_results_for_regions1
    .assign(vvp_proc = lambda df: df.eval('res_2019_vvp / res_2019 * 100'))
    .assign(labour_proc = lambda df: df.eval('res_2019_labour / res_2019 * 100'))
)

df_results_for_regions1

Unnamed: 0,rosstat_proc,id,res_2019,res_2019_vvp,res_2019_labour,res_2020,vvp_proc,labour_proc
0,,1,2.869544e+15,2.903257e+15,2.862458e+15,2.957856e+15,101.174860,99.753077
1,102.122476,2,4.168776e+12,3.306391e+12,4.183066e+12,3.288481e+12,79.313238,100.342788
2,104.511735,3,,,,,,
3,103.676307,4,1.208990e+12,,1.187349e+12,2.139598e+12,,98.210027
4,103.120324,5,5.631875e+12,5.668689e+12,5.714419e+12,5.334947e+12,100.653677,101.465652
...,...,...,...,...,...,...,...,...
84,103.114140,85,2.610169e+12,2.518217e+12,2.599092e+12,2.611148e+12,96.477160,99.575631
85,107.255345,86,9.339974e+11,6.582967e+11,9.323040e+11,8.588582e+11,70.481641,99.818702
86,108.867824,87,2.017917e+09,,2.201566e+09,2.362027e+10,,109.100922
87,133.004479,88,1.141355e+12,9.001574e+11,1.142669e+12,8.982473e+11,78.867432,100.115155


In [137]:
print('Среднее значение отклонения через МОБ 2020:')
(np.abs(df_results_for_regions1.eval('res_2020 / res_2019 * 100') \
 - df_results_for_regions1.rosstat_proc) / df_results_for_regions1.rosstat_proc * 100).median()

Среднее значение отклонения через МОБ 2020:


21.350188573628834

In [138]:
print('Среднее ВВП')
np.mean(np.abs(df_results_for_regions1.rosstat_proc - df_results_for_regions1.vvp_proc))

Среднее ВВП


10.318821803519615

In [139]:
print('Максимум ВВП')
np.max(np.abs(df_results_for_regions1.rosstat_proc - df_results_for_regions1.vvp_proc))

Максимум ВВП


54.13704665637782

In [140]:
print('Минимум ВВП')
np.min(np.abs(df_results_for_regions1.rosstat_proc - df_results_for_regions1.vvp_proc))

Минимум ВВП


0.3199416643746247

In [141]:
print('Среднее труд')
np.mean(np.abs(df_results_for_regions1.rosstat_proc - df_results_for_regions1.labour_proc))

Среднее труд


6.053916181221196

In [142]:
print('Максимум труд')
np.max(np.abs(df_results_for_regions1.rosstat_proc - df_results_for_regions1.labour_proc))

Максимум труд


32.889323862428654

In [143]:
print('Минимум труд')
np.min(np.abs(df_results_for_regions1.rosstat_proc - df_results_for_regions1.labour_proc))

Минимум труд


0.2330972914075886

In [144]:
print('Направление ВВП')
np.sign((df_results_for_regions1.rosstat_proc - 100) * (df_results_for_regions1.vvp_proc - 100)).value_counts()

Направление ВВП


-1.0    43
 1.0    27
dtype: int64

In [145]:
print('Направление труд')
np.sign((df_results_for_regions1.rosstat_proc - 100) * (df_results_for_regions1.labour_proc - 100)).value_counts()

Направление труд


-1.0    39
 1.0    38
dtype: int64