# Project 8 - Working with OLS

## Linear Algebra in *numpy*

In [5]:
import numpy as np
x1 = np.array([1,2,1])
x2 = np.array([4,1,5])
x3 = np.array([6,8,6])
x1,x2,x3

(array([1, 2, 1]), array([4, 1, 5]), array([6, 8, 6]))

In [6]:
type(x1), type(x2), type(x3)

(numpy.ndarray, numpy.ndarray, numpy.ndarray)

In [7]:
x1 = np.matrix(x1)
x2 = np.matrix(x2)
x3 = np.matrix(x3)
x1, x2, x3

(matrix([[1, 2, 1]]), matrix([[4, 1, 5]]), matrix([[6, 8, 6]]))

In [8]:
type(x1), type(x2), type(x3)

(numpy.matrix, numpy.matrix, numpy.matrix)

In [9]:
X = np.concatenate((x1,x2,x3), axis = 0)
X

matrix([[1, 2, 1],
        [4, 1, 5],
        [6, 8, 6]])

In [14]:
X_inverse = np.round(X.I,2)
X_inverse

array([[-8.5 , -1.  ,  2.25],
       [ 1.5 , -0.  , -0.25],
       [ 6.5 ,  1.  , -1.75]])

In [10]:
# transpose matrix
X.T

matrix([[1, 4, 6],
        [2, 1, 8],
        [1, 5, 6]])

## Inverting a Matrix
$XX^{-1}=I$



$A = [XI]$

### Set up the problem such that 

$AX^{-1} = [XI]X^{-1}$

$AX^{-1} = [IX^{-1}]$


Let us solve for $AX^{-1}$ using the following vectors for $X$. 

$\begin{equation*}
X = \begin{bmatrix}
1 & 2 & 1 \\
4 & 1 & 5 \\
6 & 8 & 6
\end{bmatrix}
\end{equation*}$

Concatenate a 3 X 3 identity matrix on the left of $X$:

$\begin{equation*}
I = \begin{bmatrix}
1 & 0 & 0 \\
0 & 1 & 0 \\
0 & 0 & 1
\end{bmatrix}
\end{equation*}$

$\begin{equation*}
[XI] = \begin{bmatrix}
1 & 2 & 1 & 1 & 0 & 0 \\
4 & 1 & 5 & 0 & 1 & 0 \\
6 & 8 & 6 & 0 & 0 & 1
\end{bmatrix}
\end{equation*}$

If we perform row operations on $A$ to transform $X$ in $[XI]$ into $I$, then we $I$ will be transformed into $X^{-1}$:

$\begin{equation*}
[XI] = \begin{bmatrix}
1 & 2 & 1 & 1 & 0 & 0 \\
4 & 1 & 5 & 0 & 1 & 0 \\
6 & 8 & 6 & 0 & 0 & 1
\end{bmatrix}
\end{equation*}$




$\begin{equation*}
r_2 - 4r_1:\begin{bmatrix}
1 & 2 & 1 & 1 & 0 & 0 \\
0 & -7 & 1 & -4 & 1 & 0 \\
6 & 8 & 6 & 0 & 0 & 1
\end{bmatrix}
\end{equation*}$


$\begin{equation*}
r_3 - 6r_1:\begin{bmatrix}
1 & 2 & 1 & 1 & 0 & 0 \\
0 & -7 & 1 & -4 & 1 & 0 \\
0 & -4 & 0 & -6 & 0 & 1
\end{bmatrix}
\end{equation*}$


$\begin{equation*}
r_2 \leftrightarrow r_3:\begin{bmatrix}
1 & 2 & 1 & 1 & 0 & 0 \\
0 & -4 & 0 & -6 & 0 & 1\\
0 & -7 & 1 & -4 & 1 & 0 
\end{bmatrix}
\end{equation*}$

$\begin{equation*}
r_2/{-4}:\begin{bmatrix}
1 & 2 & 1 & 1 & 0 & 0 \\
0 & 1 & 0 & 3/2 & 0 & -1/4\\
0 & -7 & 1 & -4 & 1 & 0 
\end{bmatrix}
\end{equation*}$

$\begin{equation*}
r_3 + 7r_2:\begin{bmatrix}
1 & 2 & 1 & 1 & 0 & 0 \\
0 & 1 & 0 & 3/2 & 0 & -1/4\\
0 & 0 & 1 & 13/2 & 1 & -7/4 
\end{bmatrix}
\end{equation*}$

$\begin{equation*}
r_1 + -2r_2 - r_3:\begin{bmatrix}
1 & 0 & 0 & -17/2 & -1 & 9/4 \\
0 & 1 & 0 & 3/2 & 0 & -1/4\\
0 & 0 & 1 & 13/2 & 1 & -7/4 
\end{bmatrix}
\end{equation*}$

$\begin{equation*}
IX^{-1}=\begin{bmatrix}
1 & 0 & 0 & -8.5 & -1 & 2.25 \\
0 & 1 & 0 & 1.5 & 0 & -0.25\\
0 & 0 & 1 & 6.5 & 1 & -1.75 
\end{bmatrix}
\end{equation*}$

$\begin{equation*}
X^{-1}=\begin{bmatrix}
-8.5 & -1 & 2.25 \\
1.5 & 0 & -0.25\\
6.5 & 1 & -1.75 
\end{bmatrix}
\end{equation*}$

## Running an OLS Regression

In [15]:
import pandas as pd
url = "https://www.rug.nl/ggdc/historicaldevelopment/maddison/data/mpd2020.xlsx"
mgdp = pd.read_excel(url, 
                   index_col = [0,2],
                   parse_dates = True, 
                    sheet_name = "Full data")

  mgdp = pd.read_excel(url,


In [17]:
mgdp

Unnamed: 0_level_0,Unnamed: 1_level_0,country,gdppc,pop
countrycode,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AFG,1820,Afghanistan,,3280.00000
AFG,1870,Afghanistan,,4207.00000
AFG,1913,Afghanistan,,5730.00000
AFG,1950,Afghanistan,1156.0000,8150.00000
AFG,1951,Afghanistan,1170.0000,8284.00000
...,...,...,...,...
ZWE,2014,Zimbabwe,1594.0000,13313.99205
ZWE,2015,Zimbabwe,1560.0000,13479.13812
ZWE,2016,Zimbabwe,1534.0000,13664.79457
ZWE,2017,Zimbabwe,1582.3662,13870.26413


In [18]:
filename = "efotw-2022-master-index-data-for-researchers-iso.xlsx"
data = pd.read_excel(filename,
                     index_col = [2,0],
                     header = [0],
                     sheet_name = "EFW Panel Data 2022 Report")
rename = {"Panel Data Summary Index": "Summary",
         "Area 1":"Size of Government",
         "Area 2":"Legal System and Property Rights",
         "Area 3":"Sound Money",
         "Area 4":"Freedom to Trade Internationally",
         "Area 5":"Regulation"}

In [19]:
data = data.dropna(how="all", axis = 1).rename(columns = rename)
data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ISO_Code_2,World Bank Region,"World Bank Current Income Classification, 1990-present (L=Low income, LM=Lower middle income, UM=Upper middle income, H=High income)",Countries,Summary,Size of Government,Legal System and Property Rights,Sound Money,Freedom to Trade Internationally,Regulation,Standard Deviation of the 5 EFW Areas
ISO_Code_3,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
ALB,2020,AL,Europe & Central Asia,UM,Albania,7.64,7.817077,5.260351,9.788269,8.222499,7.112958,1.652742
DZA,2020,DZ,Middle East & North Africa,LM,Algeria,5.12,4.409943,4.13176,7.630287,3.639507,5.778953,1.613103
AGO,2020,AO,Sub-Saharan Africa,LM,Angola,5.91,8.133385,3.705161,6.087996,5.37319,6.227545,1.598854
ARG,2020,AR,Latin America & the Caribbean,UM,Argentina,4.87,6.483768,4.796454,4.516018,3.086907,5.490538,1.254924
ARM,2020,AM,Europe & Central Asia,UM,Armenia,7.84,7.975292,6.236215,9.553009,7.692708,7.756333,1.178292


In [20]:
data["RGDP Per Capita"] = mgdp["gdppc"]
data

Unnamed: 0_level_0,Unnamed: 1_level_0,ISO_Code_2,World Bank Region,"World Bank Current Income Classification, 1990-present (L=Low income, LM=Lower middle income, UM=Upper middle income, H=High income)",Countries,Summary,Size of Government,Legal System and Property Rights,Sound Money,Freedom to Trade Internationally,Regulation,Standard Deviation of the 5 EFW Areas,RGDP Per Capita
ISO_Code_3,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
ALB,2020,AL,Europe & Central Asia,UM,Albania,7.640000,7.817077,5.260351,9.788269,8.222499,7.112958,1.652742,
DZA,2020,DZ,Middle East & North Africa,LM,Algeria,5.120000,4.409943,4.131760,7.630287,3.639507,5.778953,1.613103,
AGO,2020,AO,Sub-Saharan Africa,LM,Angola,5.910000,8.133385,3.705161,6.087996,5.373190,6.227545,1.598854,
ARG,2020,AR,Latin America & the Caribbean,UM,Argentina,4.870000,6.483768,4.796454,4.516018,3.086907,5.490538,1.254924,
ARM,2020,AM,Europe & Central Asia,UM,Armenia,7.840000,7.975292,6.236215,9.553009,7.692708,7.756333,1.178292,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
VEN,1970,VE,Latin America & the Caribbean,,"Venezuela, RB",7.242943,8.349529,5.003088,9.621851,7.895993,5.209592,2.028426,15289.0
VNM,1970,VN,East Asia & Pacific,,Vietnam,,,,,,,,1172.0
YEM,1970,YE,Middle East & North Africa,,"Yemen, Rep.",,,,,,,,1961.0
ZMB,1970,ZM,Sub-Saharan Africa,,Zambia,4.498763,5.374545,4.472812,5.137395,,5.307952,0.412514,1710.0


In [21]:
# data = data.sort_index()
data.sort_index(inplace = True)
data.to_excel("EFWAndRGDP.xlsx")
data.to_csv("fraserDataWithRGDPPC.csv")
data.loc["USA"]

Unnamed: 0_level_0,ISO_Code_2,World Bank Region,"World Bank Current Income Classification, 1990-present (L=Low income, LM=Lower middle income, UM=Upper middle income, H=High income)",Countries,Summary,Size of Government,Legal System and Property Rights,Sound Money,Freedom to Trade Internationally,Regulation,Standard Deviation of the 5 EFW Areas,RGDP Per Capita
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1970,US,North America,,United States,7.615181,5.341231,8.286741,9.588817,8.232334,6.631262,1.648185,23958.0
1975,US,North America,,United States,7.907324,5.336851,8.67028,9.067438,8.412407,8.042706,1.483975,25956.0
1980,US,North America,,United States,8.101368,5.788734,8.684269,9.122306,8.544966,8.359863,1.322304,29611.0
1985,US,North America,,United States,8.295413,6.582616,8.726823,9.33966,8.505198,8.335644,1.031484,33023.0
1990,US,North America,H,United States,8.461126,7.262163,8.350956,9.669842,8.537828,8.477998,0.853932,36982.0
1995,US,North America,H,United States,8.588435,7.446569,8.58787,9.787931,8.594211,8.53884,0.828418,39390.6254
2000,US,North America,H,United States,8.648077,7.704726,8.77317,9.607665,8.466601,8.739485,0.683215,45886.4705
2001,US,North America,H,United States,8.509072,7.521319,8.615158,9.612028,8.397002,8.464019,0.744511,45878.0092
2002,US,North America,H,United States,8.568646,7.673536,8.370127,9.72431,8.298912,8.81988,0.760212,46266.2959
2003,US,North America,H,United States,8.558717,7.866283,8.257379,9.743489,8.338465,8.662287,0.712756,47157.9953


In [22]:
reg_vars = ['Size of Government',
 'Legal System and Property Rights',
 'Sound Money',
 'Freedom to Trade Internationally',
 'Regulation',
 'RGDP Per Capita']
reg_data = data[reg_vars]
reg_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Size of Government,Legal System and Property Rights,Sound Money,Freedom to Trade Internationally,Regulation,RGDP Per Capita
ISO_Code_3,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AGO,1970,,,,,,2818.0000
AGO,1975,,,,,,1710.0000
AGO,1980,,,,,,1532.0000
AGO,1985,,,,,,1242.0000
AGO,1990,,,,,,1384.0000
...,...,...,...,...,...,...,...
ZWE,2016,5.332597,4.056407,8.086016,6.404937,6.520805,1534.0000
ZWE,2017,4.699843,4.071445,7.983888,4.503965,6.399757,1582.3662
ZWE,2018,5.170946,4.041897,7.312324,6.396649,6.303135,1611.4052
ZWE,2019,5.628359,4.026568,1.413372,6.397045,6.132583,


In [23]:
x_vars = reg_vars[:-1]
y_var = [reg_vars[-1]]
x_vars, y_var

(['Size of Government',
  'Legal System and Property Rights',
  'Sound Money',
  'Freedom to Trade Internationally',
  'Regulation'],
 ['RGDP Per Capita'])

In [25]:
reg_data.corr().round(2)

Unnamed: 0,Size of Government,Legal System and Property Rights,Sound Money,Freedom to Trade Internationally,Regulation,RGDP Per Capita
Size of Government,1.0,-0.1,0.16,0.15,0.2,-0.16
Legal System and Property Rights,-0.1,1.0,0.52,0.63,0.64,0.66
Sound Money,0.16,0.52,1.0,0.68,0.6,0.46
Freedom to Trade Internationally,0.15,0.63,0.68,1.0,0.64,0.51
Regulation,0.2,0.64,0.6,0.64,1.0,0.53
RGDP Per Capita,-0.16,0.66,0.46,0.51,0.53,1.0


In [31]:
reg_data.dropna(inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  reg_data.dropna(inplace = True)


In [37]:
X = reg_data[x_vars]
X["Constant"] = 1
Y = reg_data[y_var]

In [38]:
X

Unnamed: 0_level_0,Unnamed: 1_level_0,Size of Government,Legal System and Property Rights,Sound Money,Freedom to Trade Internationally,Regulation,Constant
ISO_Code_3,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AGO,2005,6.886311,3.129619,1.270081,5.356979,4.511067,1
AGO,2006,5.162277,3.238314,3.807267,5.302944,5.118114,1
AGO,2007,4.963676,3.224507,4.015297,5.139768,5.348260,1
AGO,2008,4.715589,3.382642,4.653201,5.181950,5.185843,1
AGO,2009,7.455501,3.394515,4.901540,5.503538,5.007256,1
...,...,...,...,...,...,...,...
ZWE,2014,6.771807,3.930143,7.664303,6.398692,5.039824,1
ZWE,2015,6.964753,4.108142,7.859669,6.509231,6.555970,1
ZWE,2016,5.332597,4.056407,8.086016,6.404937,6.520805,1
ZWE,2017,4.699843,4.071445,7.983888,4.503965,6.399757,1


In [39]:
Y

Unnamed: 0_level_0,Unnamed: 1_level_0,RGDP Per Capita
ISO_Code_3,Year,Unnamed: 2_level_1
AGO,2005,3708.7706
AGO,2006,4592.3373
AGO,2007,5773.5483
AGO,2008,6743.7482
AGO,2009,7087.6041
...,...,...
ZWE,2014,1594.0000
ZWE,2015,1560.0000
ZWE,2016,1534.0000
ZWE,2017,1582.3662


In [42]:
import statsmodels.api as sm
results = sm.OLS(Y, X).fit()

In [51]:
dir(results)

['HC0_se',
 'HC1_se',
 'HC2_se',
 'HC3_se',
 '_HCCM',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abat_diagonal',
 '_cache',
 '_data_attr',
 '_data_in_cache',
 '_get_robustcov_results',
 '_get_wald_nonlinear',
 '_is_nested',
 '_transform_predict_exog',
 '_use_t',
 '_wexog_singular_values',
 'aic',
 'bic',
 'bse',
 'centered_tss',
 'compare_f_test',
 'compare_lm_test',
 'compare_lr_test',
 'condition_number',
 'conf_int',
 'conf_int_el',
 'cov_HC0',
 'cov_HC1',
 'cov_HC2',
 'cov_HC3',
 'cov_kwds',
 'cov_params',
 'cov_type',
 'df_model',
 'df_resid',
 'eigenvals',
 'el_test',
 'ess',
 'f_pvalue',
 'f_test',
 'fittedvalues',
 'fvalue',
 'get_influe

In [52]:
results.summary()

0,1,2,3
Dep. Variable:,RGDP Per Capita,R-squared:,0.486
Model:,OLS,Adj. R-squared:,0.485
Method:,Least Squares,F-statistic:,593.5
Date:,"Tue, 19 Mar 2024",Prob (F-statistic):,0.0
Time:,13:58:46,Log-Likelihood:,-34081.0
No. Observations:,3145,AIC:,68170.0
Df Residuals:,3139,BIC:,68210.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Size of Government,-2752.2138,202.274,-13.606,0.000,-3148.817,-2355.611
Legal System and Property Rights,3966.0733,196.152,20.219,0.000,3581.474,4350.672
Sound Money,902.3584,177.099,5.095,0.000,555.117,1249.599
Freedom to Trade Internationally,1279.8725,211.796,6.043,0.000,864.601,1695.144
Regulation,2141.0305,281.044,7.618,0.000,1589.982,2692.079
Constant,-1.66e+04,1627.397,-10.197,0.000,-1.98e+04,-1.34e+04

0,1,2,3
Omnibus:,2952.722,Durbin-Watson:,0.174
Prob(Omnibus):,0.0,Jarque-Bera (JB):,189244.77
Skew:,4.324,Prob(JB):,0.0
Kurtosis:,40.005,Cond. No.,113.0


In [53]:
results.summary2()

0,1,2,3
Model:,OLS,Adj. R-squared:,0.485
Dependent Variable:,RGDP Per Capita,AIC:,68174.0586
Date:,2024-03-19 13:58,BIC:,68210.38
No. Observations:,3145,Log-Likelihood:,-34081.0
Df Model:,5,F-statistic:,593.5
Df Residuals:,3139,Prob (F-statistic):,0.0
R-squared:,0.486,Scale:,151660000.0

0,1,2,3,4,5,6
,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
Size of Government,-2752.2138,202.2741,-13.6064,0.0000,-3148.8166,-2355.6110
Legal System and Property Rights,3966.0733,196.1519,20.2194,0.0000,3581.4743,4350.6723
Sound Money,902.3584,177.0987,5.0952,0.0000,555.1174,1249.5995
Freedom to Trade Internationally,1279.8725,211.7956,6.0430,0.0000,864.6007,1695.1443
Regulation,2141.0305,281.0437,7.6181,0.0000,1589.9824,2692.0786
Constant,-16595.2515,1627.3971,-10.1974,0.0000,-19786.1215,-13404.3814

0,1,2,3
Omnibus:,2952.722,Durbin-Watson:,0.174
Prob(Omnibus):,0.0,Jarque-Bera (JB):,189244.77
Skew:,4.324,Prob(JB):,0.0
Kurtosis:,40.005,Condition No.:,113.0


In [54]:
# OLS regression is predicting the values of the explained variable, RGDP Per Capita
predictor = results.predict()
reg_data[y_var[0] + " Predictor"] = predictor
reg_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  reg_data[y_var[0] + " Predictor"] = predictor


Unnamed: 0_level_0,Unnamed: 1_level_0,Size of Government,Legal System and Property Rights,Sound Money,Freedom to Trade Internationally,Regulation,RGDP Per Capita,RGDP Per Capita Predictor
ISO_Code_3,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AGO,2005,6.886311,3.129619,1.270081,5.356979,4.511067,3708.7706,-5474.902171
AGO,2006,5.162277,3.238314,3.807267,5.302944,5.118114,4592.3373,3221.099672
AGO,2007,4.963676,3.224507,4.015297,5.139768,5.348260,5773.5483,4184.555105
AGO,2008,4.715589,3.382642,4.653201,5.181950,5.185843,6743.7482,5776.385317
AGO,2009,7.455501,3.394515,4.901540,5.503538,5.007256,7087.6041,-1464.025089
...,...,...,...,...,...,...,...,...
ZWE,2014,6.771807,3.930143,7.664303,6.398692,5.039824,1594.0000,6250.400915
ZWE,2015,6.964753,4.108142,7.859669,6.509231,6.555970,1560.0000,9989.206335
ZWE,2016,5.332597,4.056407,8.086016,6.404937,6.520805,1534.0000,14271.539452
ZWE,2017,4.699843,4.071445,7.983888,4.503965,6.399757,1582.3662,13288.328954


In [57]:
y_hat = reg_data[y_var[0] + " Predictor"]
y_mean = reg_data[y_var[0]].mean()
y = reg_data[y_var[0]]
reg_data["Errors"] = y.sub(y_hat)
reg_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  reg_data["Errors"] = y.sub(y_hat)


Unnamed: 0_level_0,Unnamed: 1_level_0,Size of Government,Legal System and Property Rights,Sound Money,Freedom to Trade Internationally,Regulation,RGDP Per Capita,RGDP Per Capita Predictor,Residuals,Errors
ISO_Code_3,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AGO,2005,6.886311,3.129619,1.270081,5.356979,4.511067,3708.7706,-5474.902171,9183.672771,9183.672771
AGO,2006,5.162277,3.238314,3.807267,5.302944,5.118114,4592.3373,3221.099672,1371.237628,1371.237628
AGO,2007,4.963676,3.224507,4.015297,5.139768,5.348260,5773.5483,4184.555105,1588.993195,1588.993195
AGO,2008,4.715589,3.382642,4.653201,5.181950,5.185843,6743.7482,5776.385317,967.362883,967.362883
AGO,2009,7.455501,3.394515,4.901540,5.503538,5.007256,7087.6041,-1464.025089,8551.629189,8551.629189
...,...,...,...,...,...,...,...,...,...,...
ZWE,2014,6.771807,3.930143,7.664303,6.398692,5.039824,1594.0000,6250.400915,-4656.400915,-4656.400915
ZWE,2015,6.964753,4.108142,7.859669,6.509231,6.555970,1560.0000,9989.206335,-8429.206335,-8429.206335
ZWE,2016,5.332597,4.056407,8.086016,6.404937,6.520805,1534.0000,14271.539452,-12737.539452,-12737.539452
ZWE,2017,4.699843,4.071445,7.983888,4.503965,6.399757,1582.3662,13288.328954,-11705.962754,-11705.962754


In [60]:
reg_data["SR"] = y_hat.sub(y_mean) ** 2
reg_data["SE"] = (y.sub(y_hat)) ** 2
reg_data["ST"] = (y.sub(y_mean)) ** 2
reg_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  reg_data["SR"] = y_hat.sub(y_mean) ** 2
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  reg_data["SE"] = (y.sub(y_hat)) ** 2
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  reg_data["ST"] = (y.sub(y_mean)) ** 2


Unnamed: 0_level_0,Unnamed: 1_level_0,Size of Government,Legal System and Property Rights,Sound Money,Freedom to Trade Internationally,Regulation,RGDP Per Capita,RGDP Per Capita Predictor,Residuals,Errors,SE,SR,ST
ISO_Code_3,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
AGO,2005,6.886311,3.129619,1.270081,5.356979,4.511067,3708.7706,-5474.902171,9183.672771,9183.672771,8.433985e+07,4.693919e+08,1.557949e+08
AGO,2006,5.162277,3.238314,3.807267,5.302944,5.118114,4592.3373,3221.099672,1371.237628,1371.237628,1.880293e+06,1.682067e+08,1.345186e+08
AGO,2007,4.963676,3.224507,4.015297,5.139768,5.348260,5773.5483,4184.555105,1588.993195,1588.993195,2.524899e+06,1.441440e+08,1.085140e+08
AGO,2008,4.715589,3.382642,4.653201,5.181950,5.185843,6743.7482,5776.385317,967.362883,967.362883,9.357909e+05,1.084549e+08,8.924211e+07
AGO,2009,7.455501,3.394515,4.901540,5.503538,5.007256,7087.6041,-1464.025089,8551.629189,8551.629189,7.313036e+07,3.116841e+08,8.286367e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZWE,2014,6.771807,3.930143,7.664303,6.398692,5.039824,1594.0000,6250.400915,-4656.400915,-4656.400915,2.168207e+07,9.880662e+07,2.130593e+08
ZWE,2015,6.964753,4.108142,7.859669,6.509231,6.555970,1560.0000,9989.206335,-8429.206335,-8429.206335,7.105152e+07,3.845670e+07,2.140531e+08
ZWE,2016,5.332597,4.056407,8.086016,6.404937,6.520805,1534.0000,14271.539452,-12737.539452,-12737.539452,1.622449e+08,3.682612e+06,2.148145e+08
ZWE,2017,4.699843,4.071445,7.983888,4.503965,6.399757,1582.3662,13288.328954,-11705.962754,-11705.962754,1.370296e+08,8.422902e+06,2.133991e+08


In [61]:
SSR = reg_data["SR"].sum()
SSE = reg_data["SE"].sum()
SST = reg_data["ST"].sum()
SSR,SSE,SST

(450042843462.0842, 476075689815.21045, 926118533277.295)