# Data Structure

- $p$: number of industries
- $k$: number of regions
- $i$: time periods index, $i = 1, \dots, n$
- Your variable $x_i$ is observed for all individuals across all regions and is a $p \times k$ cross-sectional matrix.

# KPST Statistic

In [99]:
import importlib

if importlib.util.find_spec('numpy') is None:
    !pip3 install numpy

if importlib.util.find_spec('scipy') is None:
    !pip3 install scipy

import numpy as np
from scipy.stats import chi2

In [100]:
## Function that calculates KPST

def kpst(x=[np.random.rand(5, 7),np.random.rand(5, 7)]):

    if isinstance(x, list): ## if there are many cross-sections
        p = np.shape(x[1])[0] ## get p,k from data, we need all cross-sections have the same dimension (same p and k)
        k = np.shape(x[1])[1] ## get p,k from data
        n = len(x)
        f_sq = 0
        for xi in x:
            fi = np.reshape(xi.flatten(order='F'), (p*k, 1)) ## pk*1
            f_sq += np.dot(fi,fi.T) ## pk*pk
        R = f_sq/n ## pk*pk, equation (1)
    else: 
        p = np.shape(x)[0] ## get p,k from data
        k = np.shape(x)[1] ## get p,k from data
        fi = np.reshape(x.flatten(order='F'), (p*k, 1)) ## pk*1
        R = np.dot(fi,fi.T)
        n = 1

    R_cal = np.empty([p ** 2, k ** 2]) ## equation (5)
    for i in range(1,p+1):
        for j in range(1,p+1):
            block_ij = R[((i-1)*k):(i*k), ((j-1)*k):(j*k)] # k*k     
            vector_ij = block_ij.flatten(order='F') # k^2*1
            R_cal[(j-1)*p+i-1,:] = vector_ij # the p-th row
        
    L, Sigma_v, Nt = np.linalg.svd(R_cal) ## equation (9)
    Sigma = np.diag(Sigma_v)
    N = Nt.T

    length = max(np.shape(L),np.shape(N))[0]
    width = min(np.shape(L),np.shape(N))[0]
    diff = length - width 

    happend = np.zeros([np.shape(Sigma)[0], diff])
    vappend = np.zeros([diff, np.shape(Sigma)[0]])

    if np.shape(L)[0] > np.shape(N)[0]:
        Sigma = np.vstack((Sigma, vappend))
    elif np.shape(L)[0] < np.shape(N)[0]:
        Sigma = np.hstack((Sigma, happend))
  
    ## Components of the KPST
    L2 = np.delete(L, 0, 1)
    Sigma2 = np.delete(Sigma, 0, 1)
    Sigma2 = np.delete(Sigma2, 0, 0)
    N2 = np.delete(N, 0, 1)

    vec_R_cal = R_cal.flatten(order='F')
    V = np.outer(vec_R_cal.T,vec_R_cal.T)

    ## equation (22)
    K1 = Sigma2.flatten(order='F') 
    K2 = np.kron(N2,L2).T@V@np.kron(N2,L2)
    KPST = n* K1.T @np.linalg.inv(K2) @K1
    return p, k, KPST

In [101]:
## Function that returns the degree of freedom
def dof(p,k):
  dof = (0.5*k*(k+1)-1)*(0.5*p*(p+1)-1) ## equation (25)
  return dof

# Test

The idea is that we calculate the KPST statistic, and compare it with $\chi^2(df,1-\alpha)$, where $df$ is the degree of freedom.

In [102]:
x = [np.random.rand(5, 7), np.random.rand(5, 7), np.random.rand(5, 7)] ## input data here

## There is possibility of not-inversible matrix and non-convergence of SVD caused by random data, just rerun the code on error.
p, k, kpst = kpst(x)

## Test
if kpst < chi2.ppf(1-.05, dof(p,k)):
    print("We can't reject the null that R has KPS at nominal size 0.05. We conclude that R has KPS.")
else: 
    print("We reject the null that R has KPS at nominal size 0.05. We conclude that R doesn't has KPS.")

We can't reject the null that R has KPS at nominal size 0.05. We conclude that R has KPS.
