In [31]:
import pandas as pd
import seaborn as sns
import statsmodels.formula.api as smf

dat = (
    pd.read_csv('data/new-zealands-greenhouse-gas-emissions-19902016.csv')
        .melt(id_vars=['Gas','Source'],
            var_name='Year', value_name='Units')
        .query('Gas == "All gases"')
        .astype({
            'Year': 'int32',
            'Gas': 'category',
            'Source': 'category'    
        })
        .pivot(index='Year', columns='Source', values='Units')
        .rename(columns={
            "All sources, Net (with LULUCF)": "Units",
            "Manufacturing Industries and Construction": "ManufacturingAndConstruction",
            "Land Use, Land-Use Change and Forestry": "LULUCF",
            "Road Transportation": "RoadTransportation"
        })
        # set Year as variable rather than index
        .reset_index()
)

In [32]:
reg = smf.ols('Units ~ ManufacturingAndConstruction + LULUCF + RoadTransportation', data=dat).fit()

In [33]:
reg.summary()

0,1,2,3
Dep. Variable:,Units,R-squared:,0.927
Model:,OLS,Adj. R-squared:,0.917
Method:,Least Squares,F-statistic:,97.27
Date:,"Mon, 15 Feb 2021",Prob (F-statistic):,3.26e-13
Time:,17:10:11,Log-Likelihood:,-245.06
No. Observations:,27,AIC:,498.1
Df Residuals:,23,BIC:,503.3
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,4.13e+04,9688.144,4.262,0.000,2.13e+04,6.13e+04
ManufacturingAndConstruction,0.0661,0.740,0.089,0.930,-1.466,1.598
LULUCF,0.8892,0.201,4.426,0.000,0.474,1.305
RoadTransportation,2.8519,0.351,8.122,0.000,2.126,3.578

0,1,2,3
Omnibus:,0.696,Durbin-Watson:,0.544
Prob(Omnibus):,0.706,Jarque-Bera (JB):,0.74
Skew:,0.323,Prob(JB):,0.691
Kurtosis:,2.51,Cond. No.,700000.0
