# Assumption : independence of errors 
This assumes the residuals have no patterns or relationships between them(no autocorrelation)


# Diagnosis : Durbin - Watson Test
As a rule of thumb, values between 1.5 and 2.5 are accepted 

In [1]:
import pandas as pd
import seaborn as sns
import statsmodels.api as sm


In [2]:
diamonds = pd.read_csv('../Course Materials/Data/Diamonds Prices2022.csv')

In [3]:
diamonds.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [4]:
diamonds['carat_sq'] = diamonds['carat']**2

In [5]:
diamonds.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z,carat_sq
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43,0.0529
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31,0.0441
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31,0.0529
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63,0.0841
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75,0.0961


In [6]:
features =[
    'carat',
    'carat_sq', 
    'depth',
    'table',
    'x'
]
X = sm.add_constant(diamonds.loc[:, features])
y = diamonds['price']

model = sm.OLS(y, X).fit()

model.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.862
Model:,OLS,Adj. R-squared:,0.862
Method:,Least Squares,F-statistic:,67450.0
Date:,"Wed, 17 Dec 2025",Prob (F-statistic):,0.0
Time:,15:26:08,Log-Likelihood:,-470360.0
No. Observations:,53943,AIC:,940700.0
Df Residuals:,53937,BIC:,940800.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,3.057e+04,505.825,60.443,0.000,2.96e+04,3.16e+04
carat,1.708e+04,198.650,85.956,0.000,1.67e+04,1.75e+04
carat_sq,-1342.4139,39.659,-33.849,0.000,-1420.146,-1264.682
depth,-271.2693,5.228,-51.890,0.000,-281.516,-261.023
table,-114.8939,3.070,-37.424,0.000,-120.911,-108.877
x,-2749.3688,52.155,-52.716,0.000,-2851.592,-2647.145

0,1,2,3
Omnibus:,13855.832,Durbin-Watson:,1.2
Prob(Omnibus):,0.0,Jarque-Bera (JB):,293840.413
Skew:,0.723,Prob(JB):,0.0
Kurtosis:,14.342,Cond. No.,6970.0


# !Important
If the data is sorted by the target, or predictor variable, it can cause this assumption to be violated


Solution is shuffling


In [7]:
diamonds =  diamonds.sort_values('price')


In [8]:
X = sm.add_constant(diamonds.loc[:, features])
y = diamonds['price']
model = sm.OLS(y, X).fit()
model.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.862
Model:,OLS,Adj. R-squared:,0.862
Method:,Least Squares,F-statistic:,67450.0
Date:,"Wed, 17 Dec 2025",Prob (F-statistic):,0.0
Time:,15:34:31,Log-Likelihood:,-470360.0
No. Observations:,53943,AIC:,940700.0
Df Residuals:,53937,BIC:,940800.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,3.057e+04,505.825,60.443,0.000,2.96e+04,3.16e+04
carat,1.708e+04,198.650,85.956,0.000,1.67e+04,1.75e+04
carat_sq,-1342.4139,39.659,-33.849,0.000,-1420.146,-1264.682
depth,-271.2693,5.228,-51.890,0.000,-281.516,-261.023
table,-114.8939,3.070,-37.424,0.000,-120.911,-108.877
x,-2749.3688,52.155,-52.716,0.000,-2851.592,-2647.145

0,1,2,3
Omnibus:,13855.832,Durbin-Watson:,1.244
Prob(Omnibus):,0.0,Jarque-Bera (JB):,293840.413
Skew:,0.723,Prob(JB):,0.0
Kurtosis:,14.342,Cond. No.,6970.0


In [10]:
# shuffle
diamonds =  diamonds.sample(frac = 1)
X = sm.add_constant(diamonds.loc[:, features])
y = diamonds['price']

model = sm.OLS(y, X).fit()
model.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.862
Model:,OLS,Adj. R-squared:,0.862
Method:,Least Squares,F-statistic:,67450.0
Date:,"Wed, 17 Dec 2025",Prob (F-statistic):,0.0
Time:,15:37:07,Log-Likelihood:,-470360.0
No. Observations:,53943,AIC:,940700.0
Df Residuals:,53937,BIC:,940800.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,3.057e+04,505.825,60.443,0.000,2.96e+04,3.16e+04
carat,1.708e+04,198.650,85.956,0.000,1.67e+04,1.75e+04
carat_sq,-1342.4139,39.659,-33.849,0.000,-1420.146,-1264.682
depth,-271.2693,5.228,-51.890,0.000,-281.516,-261.023
table,-114.8939,3.070,-37.424,0.000,-120.911,-108.877
x,-2749.3688,52.155,-52.716,0.000,-2851.592,-2647.145

0,1,2,3
Omnibus:,13855.832,Durbin-Watson:,2.003
Prob(Omnibus):,0.0,Jarque-Bera (JB):,293840.413
Skew:,0.723,Prob(JB):,0.0
Kurtosis:,14.342,Cond. No.,6970.0


See `Durbin-Watson` test. it has increased from 1.244 to 2.003 which is in the acceptable range