In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.linear_model import LinearRegression # linear regression
import statsmodels.formula.api as smf # regression modeling
from statsmodels.stats.outliers_influence import variance_inflation_factor # VIF

In [8]:
df = pd.read_csv('/workspaces/CourseWork/ZooVisitSpending.csv') #read the data in to a pandas dataframe

In [9]:
df.corr().round(2) # display a correlation matrix

Unnamed: 0,VisitSpending,PartySize,MilesFromZoo,Member
VisitSpending,1.0,0.32,0.77,-0.09
PartySize,0.32,1.0,-0.08,0.1
MilesFromZoo,0.77,-0.08,1.0,0.02
Member,-0.09,0.1,0.02,1.0


In [10]:
#make a linear regression model called zooSpendingModel
zooSpendingModel = smf.ols(formula='VisitSpending ~ PartySize + MilesFromZoo + Member', data=df).fit()
zooSpendingModel.summary()

0,1,2,3
Dep. Variable:,VisitSpending,R-squared:,0.765
Model:,OLS,Adj. R-squared:,0.759
Method:,Least Squares,F-statistic:,131.3
Date:,"Mon, 25 Sep 2023",Prob (F-statistic):,6.909999999999999e-38
Time:,20:35:24,Log-Likelihood:,-574.96
No. Observations:,125,AIC:,1158.0
Df Residuals:,121,BIC:,1169.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.2214,6.491,0.034,0.973,-12.628,13.071
PartySize,9.1362,1.018,8.979,0.000,7.122,11.151
MilesFromZoo,0.8889,0.049,18.272,0.000,0.793,0.985
Member,-14.9073,4.583,-3.253,0.001,-23.981,-5.834

0,1,2,3
Omnibus:,0.85,Durbin-Watson:,1.913
Prob(Omnibus):,0.654,Jarque-Bera (JB):,0.872
Skew:,0.006,Prob(JB):,0.647
Kurtosis:,2.591,Cond. No.,257.0


In [11]:
formula_vars = zooSpendingModel.model.exog[:, 1:]  # Exclude the intercept
vif_data = pd.DataFrame() # set a new empty dataframe called vif_data
vif_data["Variable"] = zooSpendingModel.model.exog_names[1:] 
vif_data["VIF"] = [variance_inflation_factor(formula_vars, i) for i in range(formula_vars.shape[1])]
vif_data['Multicolinearity'] = vif_data['VIF'] > 5 #check for values where VIF > 5, signalling multicolinearity
print(vif_data)

       Variable       VIF  Multicolinearity
0     PartySize  2.763759             False
1  MilesFromZoo  2.300421             False
2        Member  2.426559             False
