# Мультиколлинеарность

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from statsmodels.formula.api import ols
from statsmodels.graphics.api import plot_corr # visualization of correlations
from statsmodels.iolib.summary2 import summary_params # output & reports
from statsmodels.stats.outliers_influence import variance_inflation_factor as VIF # VIF

from scipy.stats import f # f-distribution & critical values

# Do not show Warning
import warnings
warnings.simplefilter(action='ignore', category=Warning)

In [None]:
# data importing
df = pd.read_csv('Filename.csv')

Consider a regression

$$
	\text{Type the specification}
$$

## Specification & Fitting

In [None]:
# model specification via formula
mod = ols(formula='your specification', data=df)
# fitting with default (non-robust) covariance matrix estimator
res = mod.fit()

## VIF

In [None]:
# exogenous variables' names
mod.exog_names

In [None]:
# Matrix of regression design
# mod.exog
# as pandas DataFrame (for simplicity)
X = pd.DataFrame(mod.exog, columns=mod.exog_names)
X

In [None]:
# print all VIFs
for i in np.arange(1, len(mod.exog_names)):
	print(f'VIF({mod.exog_names[i]})={round(VIF(exog=X, exog_idx=i),3)}')

In [None]:
# print VIF, with col names of X
VIF(exog=X, exog_idx=1), VIF(exog=X, exog_idx=2), VIF(exog=X, exog_idx=3), VIF(exog=X, exog_idx=4)

## Correlations for regressors

In [None]:
# Correlation matrix for regressors
corr_matrix = X.drop(columns='Intercept').corr()
corr_matrix.round(3)

## Visualization of correlation matrix

In [None]:
# Visualization with statsmodels
plot_corr(corr_matrix, xnames=mod.exog_names[1:], normcolor=True)
plt.show()

In [None]:
# Visualization with seaborn
sns.heatmap(corr_matrix, vmax=1, vmin=-1, annot=True)

In [None]:
# Visualization with pandas
corr_matrix.style.background_gradient(cmap='coolwarm', vmax=1, vmin=-1).format(precision=2)

## Non-robust t-test for coefficients

In [None]:
# significant level
sign_level = 

In [None]:
# significance with P-values
summary_params(res, alpha=sign_level).round(4)

### Conclusion

your conclusion

## Non-robust F-test

In [None]:
# Hypothesis specification
F_test = res.f_test('your hypothesis')
# F-statistics and its P-value
F_test.statistic, F_test.pvalue

In [None]:
# significant level
sign_level = 
# critical value of F-distribution
f.isf(q=sign_level, dfn=F_test.df_num, dfd=F_test.df_denom)

### Conclusion

your conclusion