In [1]:
import statsmodels.formula.api as smf
import pandas as pd
import numpy as np

from utils.load_data_for_eda import load_data
df = load_data('data/commodity_prices.csv')

# import warnings
# warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)



In [2]:
df.head()

Unnamed: 0,Product_Type,Commodity,Arrival_Date,Market,Is_VFPCK,Season,Year,Modal_Price,Max_Price,Min_Price
0,Alsandikai|Alsandikai|FAQ,Alsandikai,2023-12-13,North Paravur,False,Winter,2023,5200.0,6000.0,5000.0
1,Alsandikai|Alsandikai|FAQ,Alsandikai,2023-12-14,North Paravur,False,Winter,2023,6200.0,6500.0,6000.0
2,Alsandikai|Alsandikai|FAQ,Alsandikai,2023-12-16,North Paravur,False,Winter,2023,4800.0,5600.0,4600.0
3,Alsandikai|Alsandikai|FAQ,Alsandikai,2023-12-18,North Paravur,False,Winter,2023,3500.0,4500.0,3000.0
4,Alsandikai|Alsandikai|FAQ,Alsandikai,2023-12-19,North Paravur,False,Winter,2023,5500.0,6000.0,5500.0


In [24]:
df[df['Product_Type'] == 'Banana|Nendra Bale|Large']['Market'].nunique()    

17

In [21]:
def has_yearly_variance(group, tol=0.05):
    yearly_means = group.groupby(['Market', 'Year'])['log_Modal_Price'].mean()
    return yearly_means.max() - yearly_means.min() > tol

In [22]:
def has_seasonal_variance(group, tol=0.05):
    seasonal_means = group.groupby(['Market', 'Season'])['log_Modal_Price'].mean()
    # Check if the difference between max and min seasonal mean exceeds tol
    return seasonal_means.max() - seasonal_means.min() > tol

In [23]:
using_mixedlm = True
for prod, group in df.groupby('Product_Type'):
     year_counts = group['Year'].nunique()
     group['log_Modal_Price'] = group['Modal_Price'].apply(lambda x: np.log(x) if x>0 else np.nan)
     if year_counts > 2 and group['Market'].nunique() > 2:
     #if year_counts > 1:
          if has_yearly_variance(group) and has_seasonal_variance(group):
                    if group['Season'].value_counts().shape[0] >= 2:
                         formula = smf.mixedlm('log_Modal_Price ~ C(Season) + Year',
                                             data=group,
                                             groups=group['Market'],
                                             re_formula='Year')
                         print("rich formula") 
                    else:
                         formula = smf.mixedlm('log_Modal_Price ~ Year',
                                             data=group,
                                             groups=group['Market'],
                                             re_formula='Year')
                         print("formula with no season")
          elif has_yearly_variance(group) and not has_seasonal_variance(group):
               formula = smf.mixedlm('log_Modal_Price ~ Year',
                                        data=group,
                                        groups=group['Market'],
                                        re_formula='Year')
               print("formula with no season")
          elif not has_yearly_variance(group) and has_seasonal_variance(group):
               formula = smf.mixedlm('log_Modal_Price ~ C(Season)',
                                        data=group,
                                        groups=group['Market'])
               print("formula with no year")
          else:
               formula = smf.mixedlm('log_Modal_Price ~ 1',
                                        data=group,
                                        groups=group['Market'])
               print("formula with no year and no season")
     elif year_counts == 2 and group['Market'].nunique() > 2:
          if has_seasonal_variance(group):
               if group['Season'].value_counts().shape[0] >= 2:
                    formula = smf.mixedlm('log_Modal_Price ~ C(Season)',
                                    data=group,
                                    groups=group['Market'])
                    print("formula with season only")
               else:
                    formula = smf.mixedlm('log_Modal_Price ~ 1',
                                    data=group,
                                    groups=group['Market'])
                    print("formula with no season and no year")
          else:
               formula = smf.mixedlm('log_Modal_Price ~ 1',
                                    data=group,
                                    groups=group['Market'])    
               print("formula with no season and no year")
     else:
          if has_yearly_variance(group) and has_seasonal_variance(group):
               if group['Season'].value_counts().shape[0] >= 2:
                    formula = smf.ols('log_Modal_Price ~ C(Season)', data=group)
                    using_mixedlm = False
                    print("OLS: only season effect")
               else:
                    formula = smf.ols('log_Modal_Price ~ 1', data=group)
                    using_mixedlm = False
                    print("OLS: constant mean only")
          elif has_seasonal_variance(group):
               formula = smf.ols('log_Modal_Price ~ C(Season)', data=group)
               using_mixedlm = False
               print("OLS: only season effect")
          else:
               formula = smf.ols('log_Modal_Price ~ 1', data=group)
               using_mixedlm = False
               print("OLS: constant mean only")
               
     if using_mixedlm:
          result = formula.fit(method="lbfgs", reml=False)
     else:  # OLS
          result = formula.fit()
     print(f'Product: {prod}')
     print(result.summary())         

       

OLS: only season effect
Product: Alsandikai|Alsandikai|FAQ
                            OLS Regression Results                            
Dep. Variable:        log_Modal_Price   R-squared:                       0.386
Model:                            OLS   Adj. R-squared:                  0.382
Method:                 Least Squares   F-statistic:                     97.89
Date:                Sat, 23 Aug 2025   Prob (F-statistic):           3.60e-49
Time:                        16:02:06   Log-Likelihood:                 193.01
No. Observations:                 471   AIC:                            -378.0
Df Residuals:                     467   BIC:                            -361.4
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------



Product: Amaranthus|Other|FAQ
                     Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:        1160          Method:                   REML           
No. Groups:              3             Scale:                    0.0237         
Min. group size:         2             Log-Likelihood:           489.0055       
Max. group size:         589           Converged:                Yes            
Mean group size:         386.7                                                  
--------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975] 
--------------------------------------------------------------------------------
Intercept                      -185.923   13.948 -13.330 0.000 -213.260 -158.586
C(Season)[T.Southwest Monsoon]    0.025    0.013   1.948 0.051   -0.000    0.050
C(Season)[T.Summer] 



Product: Amphophalus|Amphophalus|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      1895         Method:                  REML           
No. Groups:            8            Scale:                   0.0160         
Min. group size:       2            Log-Likelihood:          1192.8236      
Max. group size:       609          Converged:               Yes            
Mean group size:       236.9                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.    z    P>|z|  [0.025 0.975]
----------------------------------------------------------------------------
Intercept                      -11.364   10.627  -1.069 0.285 -32.192  9.464
C(Season)[T.Southwest Monsoon]   0.052    0.008   6.277 0.000   0.036  0.069
C(Season)[T.Summer]             -0.063    0.010  -6.418 0.0



Product: Ashgourd|Ashgourd|FAQ
                  Mixed Linear Model Regression Results
Model:                MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:     5204        Method:                 REML           
No. Groups:           15          Scale:                  0.0650         
Min. group size:      3           Log-Likelihood:         -322.8520      
Max. group size:      661         Converged:              Yes            
Mean group size:      346.9                                              
-------------------------------------------------------------------------
                               Coef.  Std.Err.   z    P>|z| [0.025 0.975]
-------------------------------------------------------------------------
Intercept                      36.772   12.107  3.037 0.002 13.043 60.501
C(Season)[T.Southwest Monsoon]  0.433    0.011 41.194 0.000  0.413  0.454
C(Season)[T.Summer]             0.054    0.012  4.586 0.000  0.031  0.077
C(Season)[T.Winter]      



Product: Banana - Green|Banana - Green|FAQ
                     Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:        4495          Method:                   REML           
No. Groups:              12            Scale:                    0.0534         
Min. group size:         3             Log-Likelihood:           70.9591        
Max. group size:         662           Converged:                Yes            
Mean group size:         374.6                                                  
--------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975] 
--------------------------------------------------------------------------------
Intercept                      -350.063   11.661 -30.019 0.000 -372.918 -327.207
C(Season)[T.Southwest Monsoon]    0.085    0.010   8.312 0.000    0.065    0.105
C(Seaso

  sdf[0:self.k_fe, 1] = np.sqrt(np.diag(self.cov_params()[0:self.k_fe]))


Product: Banana|Nendra Bale|Large
                     Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:        5000          Method:                   REML           
No. Groups:              17            Scale:                    0.0322         
Min. group size:         1             Log-Likelihood:           1401.9032      
Max. group size:         593           Converged:                No             
Mean group size:         294.1                                                  
--------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975] 
--------------------------------------------------------------------------------
Intercept                      -374.656   11.058 -33.880 0.000 -396.330 -352.982
C(Season)[T.Southwest Monsoon]   -0.010    0.008  -1.177 0.239   -0.026    0.007
C(Season)[T.Summ



Product: Banana|Nendra Bale|Small
               Mixed Linear Model Regression Results
Model:               MixedLM   Dependent Variable:   log_Modal_Price
No. Observations:    33        Method:               REML           
No. Groups:          6         Scale:                0.0238         
Min. group size:     1         Log-Likelihood:       -1.6607        
Max. group size:     13        Converged:            No             
Mean group size:     5.5                                            
--------------------------------------------------------------------
                     Coef.  Std.Err.   z    P>|z|   [0.025   0.975] 
--------------------------------------------------------------------
Intercept           127.353 1711.479  0.074 0.941 -3227.085 3481.790
C(Season)[T.Summer]  -0.161    0.114 -1.418 0.156    -0.383    0.062
C(Season)[T.Winter]  -0.106    0.125 -0.847 0.397    -0.351    0.139
Year                 -0.059    0.846 -0.070 0.944    -1.716    1.598
Group Var       



Product: Banana|Palayamthodan|Large
                     Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:        3527          Method:                   REML           
No. Groups:              19            Scale:                    0.0463         
Min. group size:         2             Log-Likelihood:           355.7139       
Max. group size:         591           Converged:                Yes            
Mean group size:         185.6                                                  
--------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975] 
--------------------------------------------------------------------------------
Intercept                      -131.568   14.619  -9.000 0.000 -160.220 -102.916
C(Season)[T.Southwest Monsoon]    0.211    0.012  18.140 0.000    0.188    0.234
C(Season)[T.Su

  sdf[0:self.k_fe, 1] = np.sqrt(np.diag(self.cov_params()[0:self.k_fe]))


Product: Banana|Palayamthodan|Medium
                     Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:        2617          Method:                   REML           
No. Groups:              16            Scale:                    0.0247         
Min. group size:         1             Log-Likelihood:           1082.5201      
Max. group size:         659           Converged:                Yes            
Mean group size:         163.6                                                  
--------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975] 
--------------------------------------------------------------------------------
Intercept                      -149.399   13.045 -11.453 0.000 -174.967 -123.831
C(Season)[T.Southwest Monsoon]   -0.017    0.009  -1.844 0.065   -0.035    0.001
C(Season)[T.S



Product: Banana|Poovan|Large
                     Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:        2816          Method:                   REML           
No. Groups:              16            Scale:                    0.0312         
Min. group size:         1             Log-Likelihood:           833.9016       
Max. group size:         590           Converged:                Yes            
Mean group size:         176.0                                                  
--------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975] 
--------------------------------------------------------------------------------
Intercept                      -259.026   13.759 -18.826 0.000 -285.993 -232.059
C(Season)[T.Southwest Monsoon]    0.067    0.011   6.089 0.000    0.045    0.088
C(Season)[T.Summer]  



Product: Banana|Poovan|Medium
                      Mixed Linear Model Regression Results
Model:                    MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:         1070          Method:                   REML           
No. Groups:               15            Scale:                    0.0467         
Min. group size:          1             Log-Likelihood:           38.8487        
Max. group size:          590           Converged:                No             
Mean group size:          71.3                                                   
---------------------------------------------------------------------------------
                                Coef.    Std.Err.    z    P>|z|  [0.025   0.975] 
---------------------------------------------------------------------------------
Intercept                      -257.918    45.199  -5.706 0.000 -346.506 -169.329
C(Season)[T.Southwest Monsoon]   -0.030     0.019  -1.584 0.113   -0.068    0.007
C(Season



Product: Banana|Robusta|Large
                    Mixed Linear Model Regression Results
Model:                   MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:        2239         Method:                  REML           
No. Groups:              12           Scale:                   0.0541         
Min. group size:         3            Log-Likelihood:          25.6311        
Max. group size:         587          Converged:               No             
Mean group size:         186.6                                                
------------------------------------------------------------------------------
                                Coef.   Std.Err.   z    P>|z|  [0.025   0.975]
------------------------------------------------------------------------------
Intercept                      -102.272   21.856 -4.679 0.000 -145.110 -59.434
C(Season)[T.Southwest Monsoon]    0.006    0.017  0.334 0.738   -0.027   0.038
C(Season)[T.Summer]               0.010    



Product: Banana|Robusta|Small
                Mixed Linear Model Regression Results
Model:                MixedLM   Dependent Variable:   log_Modal_Price
No. Observations:     16        Method:               REML           
No. Groups:           4         Scale:                0.0476         
Min. group size:      1         Log-Likelihood:       -2.4889        
Max. group size:      13        Converged:            No             
Mean group size:      4.0                                            
---------------------------------------------------------------------
                     Coef.   Std.Err.   z    P>|z|   [0.025   0.975] 
---------------------------------------------------------------------
Intercept           -510.467 1598.182 -0.319 0.749 -3642.845 2621.912
C(Season)[T.Summer]    0.104    1.319  0.079 0.937    -2.481    2.689
Year                   0.256    0.790  0.324 0.746    -1.292    1.803
Group Var              0.048                                         
Group 



Product: Bhindi(Ladies Finger)|Bhindi|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      3724         Method:                  REML           
No. Groups:            11           Scale:                   0.0406         
Min. group size:       1            Log-Likelihood:          622.9610       
Max. group size:       661          Converged:               No             
Mean group size:       338.5                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                      -34.950   11.222 -3.114 0.002 -56.944 -12.955
C(Season)[T.Southwest Monsoon]   0.062    0.010  6.469 0.000   0.043   0.080
C(Season)[T.Summer]              0.049    0.011  4.521



Product: Bitter gourd|Bitter Gourd|FAQ
                   Mixed Linear Model Regression Results
Model:                  MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:       4101        Method:                 REML           
No. Groups:             12          Scale:                  0.0375         
Min. group size:        2           Log-Likelihood:         838.9261       
Max. group size:        663         Converged:              No             
Mean group size:        341.8                                              
---------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z| [0.025  0.975]
---------------------------------------------------------------------------
Intercept                      108.014   10.125 10.668 0.000 88.169 127.859
C(Season)[T.Southwest Monsoon]   0.242    0.009 27.808 0.000  0.225   0.259
C(Season)[T.Summer]              0.303    0.010 30.185 0.000  0.283 



Product: Bottle gourd|Bottle Gourd|FAQ
                   Mixed Linear Model Regression Results
Model:                  MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:       2689        Method:                 REML           
No. Groups:             7           Scale:                  0.0383         
Min. group size:        1           Log-Likelihood:         489.4384       
Max. group size:        606         Converged:              No             
Mean group size:        384.1                                              
---------------------------------------------------------------------------
                               Coef.  Std.Err.    z    P>|z| [0.025  0.975]
---------------------------------------------------------------------------
Intercept                      86.255   13.311   6.480 0.000 60.166 112.343
C(Season)[T.Southwest Monsoon]  0.015    0.012   1.290 0.197 -0.008   0.039
C(Season)[T.Summer]            -0.144    0.013 -11.480 0.000 -0.169 



rich formula
Product: Brinjal|Brinjal|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      2879         Method:                  REML           
No. Groups:            7            Scale:                   0.0282         
Min. group size:       68           Log-Likelihood:          969.2133       
Max. group size:       659          Converged:               Yes            
Mean group size:       411.3                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                      -51.895   10.557 -4.916 0.000 -72.586 -31.203
C(Season)[T.Southwest Monsoon]   0.066    0.009  7.618 0.000   0.049   0.083
C(Season)[T.Summer]             -0.074    0.010 -7.093



rich formula




Product: Cabbage|Cabbage|FAQ
                  Mixed Linear Model Regression Results
Model:                MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:     3422        Method:                 REML           
No. Groups:           8           Scale:                  0.0482         
Min. group size:      39          Log-Likelihood:         242.9041       
Max. group size:      662         Converged:              Yes            
Mean group size:      427.8                                              
-------------------------------------------------------------------------
                               Coef.  Std.Err.   z    P>|z| [0.025 0.975]
-------------------------------------------------------------------------
Intercept                      33.674   12.878  2.615 0.009  8.433 58.914
C(Season)[T.Southwest Monsoon]  0.109    0.011  9.870 0.000  0.088  0.131
C(Season)[T.Summer]            -0.020    0.013 -1.559 0.119 -0.044  0.005
C(Season)[T.Winter]        



Product: Carrot|Carrot|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      3418         Method:                  REML           
No. Groups:            8            Scale:                   0.0612         
Min. group size:       52           Log-Likelihood:          -165.6109      
Max. group size:       662          Converged:               Yes            
Mean group size:       427.2                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                      -72.413   13.977 -5.181 0.000 -99.807 -45.019
C(Season)[T.Southwest Monsoon]   0.199    0.012 16.052 0.000   0.175   0.223
C(Season)[T.Summer]             -0.117    0.014 -8.302 0.000  -0.145 



Product: Cauliflower|Other|FAQ
                  Mixed Linear Model Regression Results
Model:                 MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:      1713        Method:                 REML           
No. Groups:            3           Scale:                  0.0352         
Min. group size:       553         Log-Likelihood:         400.2442       
Max. group size:       606         Converged:              No             
Mean group size:       571.0                                              
--------------------------------------------------------------------------
                               Coef.  Std.Err.   z    P>|z|  [0.025 0.975]
--------------------------------------------------------------------------
Intercept                       5.049   14.165  0.356 0.722 -22.714 32.811
C(Season)[T.Southwest Monsoon]  0.104    0.013  7.986 0.000   0.078  0.129
C(Season)[T.Summer]            -0.073    0.015 -4.862 0.000  -0.102 -0.044
C(Season)[T.W



rich formula




Product: Colacasia|Other|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      3330         Method:                  REML           
No. Groups:            7            Scale:                   0.0189         
Min. group size:       63           Log-Likelihood:          1804.2972      
Max. group size:       653          Converged:               No             
Mean group size:       475.7                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                      -69.510    7.614 -9.129 0.000 -84.434 -54.586
C(Season)[T.Southwest Monsoon]  -0.065    0.007 -9.413 0.000  -0.078  -0.051
C(Season)[T.Summer]             -0.048    0.008 -6.034 0.000  -0.06



Product: Cowpea(Veg)|Cowpea (Veg)|FAQ
                  Mixed Linear Model Regression Results
Model:                MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:     3000        Method:                 REML           
No. Groups:           14          Scale:                  0.0665         
Min. group size:      2           Log-Likelihood:         -259.2877      
Max. group size:      608         Converged:              No             
Mean group size:      214.3                                              
-------------------------------------------------------------------------
                               Coef.  Std.Err.   z    P>|z| [0.025 0.975]
-------------------------------------------------------------------------
Intercept                      48.415   17.042  2.841 0.004 15.014 81.816
C(Season)[T.Southwest Monsoon]  0.054    0.015  3.662 0.000  0.025  0.083
C(Season)[T.Summer]            -0.103    0.016 -6.336 0.000 -0.135 -0.071
C(Season)[T.Winter



Product: Cucumbar(Kheera)|Cucumbar|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      4028         Method:                  REML           
No. Groups:            12           Scale:                   0.0765         
Min. group size:       6            Log-Likelihood:          -584.9362      
Max. group size:       659          Converged:               Yes            
Mean group size:       335.7                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                      -67.905   15.171 -4.476 0.000 -97.639 -38.170
C(Season)[T.Southwest Monsoon]   0.121    0.013  9.303 0.000   0.095   0.146
C(Season)[T.Summer]             -0.086    0.014 -6.037 0.

  skew = stats.skew(resids, axis=axis)
  kurtosis = 3 + stats.kurtosis(resids, axis=axis)
  b2 = skew(a, axis, _no_deco=True)
  b2 = kurtosis(a, axis, fisher=False, _no_deco=True)
  return 1 - self.ssr/self.centered_tss


Product: Drumstick|Drumstick|FAQ
                     Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:        3354          Method:                   REML           
No. Groups:              8             Scale:                    0.1611         
Min. group size:         37            Log-Likelihood:           -1785.9817     
Max. group size:         651           Converged:                No             
Mean group size:         419.2                                                  
--------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975] 
--------------------------------------------------------------------------------
Intercept                      -198.962   23.045  -8.634 0.000 -244.129 -153.795
C(Season)[T.Southwest Monsoon]   -0.070    0.020  -3.424 0.001   -0.110   -0.030
C(Season)[T.Summe



Product: Elephant Yam (Suran)|Other|FAQ
                     Mixed Linear Model Regression Results
Model:                  MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:       1889          Method:                   REML           
No. Groups:             4             Scale:                    0.0197         
Min. group size:        63            Log-Likelihood:           980.3444       
Max. group size:        652           Converged:                No             
Mean group size:        472.2                                                  
-------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975]
-------------------------------------------------------------------------------
Intercept                      -119.475   10.195 -11.719 0.000 -139.457 -99.493
C(Season)[T.Southwest Monsoon]   -0.024    0.009  -2.575 0.010   -0.042  -0.006
C(Season)[T.Summer]  



Product: Ginger(Green)|Green Ginger|FAQ
                    Mixed Linear Model Regression Results
Model:                  MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:       3060         Method:                  REML           
No. Groups:             10           Scale:                   0.0918         
Min. group size:        3            Log-Likelihood:          -747.5385      
Max. group size:        660          Converged:               No             
Mean group size:        306.0                                                
-----------------------------------------------------------------------------
                                Coef.  Std.Err.    z    P>|z|  [0.025  0.975]
-----------------------------------------------------------------------------
Intercept                      935.130   18.476  50.614 0.000 898.919 971.342
C(Season)[T.Southwest Monsoon]   0.196    0.016  12.031 0.000   0.164   0.228
C(Season)[T.Summer]              0.130    0.



Product: Green Chilli|Green Chilly|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      3418         Method:                  REML           
No. Groups:            9            Scale:                   0.0435         
Min. group size:       2            Log-Likelihood:          404.4242       
Max. group size:       661          Converged:               No             
Mean group size:       379.8                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                      161.077   11.674 13.798 0.000 138.197 183.956
C(Season)[T.Southwest Monsoon]   0.392    0.010 37.373 0.000   0.372   0.413
C(Season)[T.Summer]              0.054    0.012  4.526 0.



                    Mixed Linear Model Regression Results
Model:                  MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:       1362         Method:                  REML           
No. Groups:             4            Scale:                   0.0629         
Min. group size:        7            Log-Likelihood:          -93.5528       
Max. group size:        538          Converged:               Yes            
Mean group size:        340.5                                                
-----------------------------------------------------------------------------
                                Coef.  Std.Err.    z    P>|z|  [0.025  0.975]
-----------------------------------------------------------------------------
Intercept                      288.592   27.249  10.591 0.000 235.185 341.998
C(Season)[T.Southwest Monsoon]  -0.106    0.022  -4.718 0.000  -0.150  -0.062
C(Season)[T.Summer]              0.139    0.022   6.270 0.000   0.095   0.182
C(Seas



Product: Little gourd (Kundru)|Other|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      3509         Method:                  REML           
No. Groups:            6            Scale:                   0.0383         
Min. group size:       539          Log-Likelihood:          709.2108       
Max. group size:       661          Converged:               Yes            
Mean group size:       584.8                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                      -43.662   10.224 -4.271 0.000 -63.700 -23.624
C(Season)[T.Southwest Monsoon]   0.145    0.009 15.444 0.000   0.126   0.163
C(Season)[T.Summer]             -0.056    0.011 -5.086 



Product: Mango (Raw-Ripe)|Other|FAQ
                     Mixed Linear Model Regression Results
Model:                  MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:       734           Method:                   REML           
No. Groups:             3             Scale:                    0.0685         
Min. group size:        28            Log-Likelihood:           -75.6690       
Max. group size:        586           Converged:                Yes            
Mean group size:        244.7                                                  
-------------------------------------------------------------------------------
                               Coef.   Std.Err.     z    P>|z|  [0.025   0.975]
-------------------------------------------------------------------------------
Intercept                      71.088     95.103   0.747 0.455 -115.311 257.487
C(Season)[T.Southwest Monsoon] -0.559      0.029 -19.055 0.000   -0.617  -0.502
C(Season)[T.Summer]      



Product: Papaya (Raw)|Other|FAQ
                 Mixed Linear Model Regression Results
Model:                 MixedLM    Dependent Variable:    log_Modal_Price
No. Observations:      142        Method:                REML           
No. Groups:            5          Scale:                 0.1106         
Min. group size:       1          Log-Likelihood:        -51.3254       
Max. group size:       110        Converged:             No             
Mean group size:       28.4                                             
------------------------------------------------------------------------
                               Coef. Std.Err.   z    P>|z| [0.025 0.975]
------------------------------------------------------------------------
Intercept                      7.084    0.185 38.351 0.000  6.722  7.446
C(Season)[T.Southwest Monsoon] 0.813    0.212  3.839 0.000  0.398  1.228
C(Season)[T.Summer]            0.789    0.214  3.688 0.000  0.370  1.209
C(Season)[T.Winter]            1.000 



Product: Pineapple|Other|Medium
                  Mixed Linear Model Regression Results
Model:                 MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:      1401        Method:                 REML           
No. Groups:            8           Scale:                  0.0986         
Min. group size:       1           Log-Likelihood:         -407.9434      
Max. group size:       559         Converged:              No             
Mean group size:       175.1                                              
--------------------------------------------------------------------------
                               Coef.  Std.Err.   z    P>|z|  [0.025 0.975]
--------------------------------------------------------------------------
Intercept                      32.110   34.594  0.928 0.353 -35.694 99.914
C(Season)[T.Southwest Monsoon] -0.157    0.027 -5.747 0.000  -0.210 -0.103
C(Season)[T.Summer]            -0.128    0.028 -4.588 0.000  -0.183 -0.073
C(Season)[T.

  sdf[0:self.k_fe, 1] = np.sqrt(np.diag(self.cov_params()[0:self.k_fe]))


Product: Pineapple|Pine Apple|Large
                  Mixed Linear Model Regression Results
Model:                 MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:      1160        Method:                 REML           
No. Groups:            8           Scale:                  0.0758         
Min. group size:       2           Log-Likelihood:         -176.4156      
Max. group size:       594         Converged:              No             
Mean group size:       145.0                                              
--------------------------------------------------------------------------
                               Coef.  Std.Err.   z    P>|z|  [0.025 0.975]
--------------------------------------------------------------------------
Intercept                      -8.965   28.181 -0.318 0.750 -64.198 46.268
C(Season)[T.Southwest Monsoon] -0.129    0.029 -4.380 0.000  -0.187 -0.071
C(Season)[T.Summer]            -0.177    0.029 -6.009 0.000  -0.235 -0.119
C(Season



Product: Pineapple|Pine Apple|Medium
                    Mixed Linear Model Regression Results
Model:                   MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:        2506         Method:                  REML           
No. Groups:              7            Scale:                   0.0626         
Min. group size:         2            Log-Likelihood:          -162.8024      
Max. group size:         644          Converged:               No             
Mean group size:         358.0                                                
------------------------------------------------------------------------------
                                Coef.  Std.Err.    z    P>|z|  [0.025   0.975]
------------------------------------------------------------------------------
Intercept                      -67.912   16.911  -4.016 0.000 -101.057 -34.767
C(Season)[T.Southwest Monsoon]  -0.132    0.014  -9.332 0.000   -0.160  -0.105
C(Season)[T.Summer]             -0.0



Product: Potato|Potato|FAQ
                     Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:        2454          Method:                   REML           
No. Groups:              5             Scale:                    0.0283         
Min. group size:         74            Log-Likelihood:           864.5230       
Max. group size:         661           Converged:                Yes            
Mean group size:         490.8                                                  
--------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975] 
--------------------------------------------------------------------------------
Intercept                      -145.597   10.567 -13.778 0.000 -166.308 -124.886
C(Season)[T.Southwest Monsoon]   -0.143    0.010 -14.837 0.000   -0.162   -0.124
C(Season)[T.Summer]    



Product: Pumpkin|Pumpkin|FAQ
                  Mixed Linear Model Regression Results
Model:                MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:     2546        Method:                 REML           
No. Groups:           6           Scale:                  0.0301         
Min. group size:      71          Log-Likelihood:         777.3345       
Max. group size:      651         Converged:              No             
Mean group size:      424.3                                              
-------------------------------------------------------------------------
                               Coef.  Std.Err.   z    P>|z| [0.025 0.975]
-------------------------------------------------------------------------
Intercept                      68.345   11.427  5.981 0.000 45.948 90.741
C(Season)[T.Southwest Monsoon]  0.157    0.010 16.305 0.000  0.138  0.176
C(Season)[T.Summer]             0.053    0.012  4.497 0.000  0.030  0.076
C(Season)[T.Winter]        



Product: Snakeguard|Other|FAQ
                   Mixed Linear Model Regression Results
Model:                  MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:       2859        Method:                 REML           
No. Groups:             8           Scale:                  0.0698         
Min. group size:        9           Log-Likelihood:         -341.1123      
Max. group size:        590         Converged:              Yes            
Mean group size:        357.4                                              
---------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025 0.975]
---------------------------------------------------------------------------
Intercept                      -13.642   18.580 -0.734 0.463 -50.058 22.773
C(Season)[T.Southwest Monsoon]   0.118    0.015  7.698 0.000   0.088  0.148
C(Season)[T.Summer]             -0.075    0.016 -4.700 0.000  -0.107 -0.044
C



Product: Snakeguard|Snakeguard|FAQ
                     Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:        2655          Method:                   REML           
No. Groups:              9             Scale:                    0.0359         
Min. group size:         2             Log-Likelihood:           609.8639       
Max. group size:         652           Converged:                Yes            
Mean group size:         295.0                                                  
--------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975] 
--------------------------------------------------------------------------------
Intercept                      -139.082   11.747 -11.840 0.000 -162.105 -116.058
C(Season)[T.Southwest Monsoon]    0.030    0.010   2.828 0.005    0.009    0.050
C(Season)[T.Sum



Product: Tapioca|Tapioca|FAQ
                    Mixed Linear Model Regression Results
Model:                  MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:       4285         Method:                  REML           
No. Groups:             10           Scale:                   0.0149         
Min. group size:        11           Log-Likelihood:          2821.0996      
Max. group size:        634          Converged:               Yes            
Mean group size:        428.5                                                
-----------------------------------------------------------------------------
                                Coef.  Std.Err.    z    P>|z|  [0.025  0.975]
-----------------------------------------------------------------------------
Intercept                      -83.903    5.902 -14.217 0.000 -95.470 -72.337
C(Season)[T.Southwest Monsoon]  -0.120    0.005 -22.426 0.000  -0.131  -0.110
C(Season)[T.Summer]             -0.087    0.006 -13.586

  return hypotest_fun_in(*args, **kwds)


                            OLS Regression Results                            
Dep. Variable:        log_Modal_Price   R-squared:                       0.387
Model:                            OLS   Adj. R-squared:                  0.305
Method:                 Least Squares   F-statistic:                     4.729
Date:                Sat, 23 Aug 2025   Prob (F-statistic):             0.0256
Time:                        16:02:20   Log-Likelihood:                 1.8175
No. Observations:                  18   AIC:                             2.365
Df Residuals:                      15   BIC:                             5.036
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                          coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------
Intercept               8.3112    



In [None]:
using_mixedlm = True
for prod, group in df.groupby('Product_Type'):
     year_counts = group['Year'].nunique()
     group['log_Modal_Price'] = group['Modal_Price'].apply(lambda x: np.log(x) if x>0 else np.nan)
     #if year_counts > 1 and group['Market'].nunique() > 1:
     if year_counts > 1:
          if has_yearly_variance(group) and has_seasonal_variance(group):
                    if group['Season'].value_counts().shape[0] >= 2:
                         formula = smf.mixedlm('log_Modal_Price ~ C(Season) + Year',
                                             data=group,
                                             groups=group['Market'],
                                             re_formula='Year')
                         print("rich formula") 
                    else:
                         formula = smf.mixedlm('log_Modal_Price ~ Year',
                                             data=group,
                                             groups=group['Market'],
                                             re_formula='Year')
                         print("formula with no season")
          elif has_yearly_variance(group) and not has_seasonal_variance(group):
               formula = smf.mixedlm('log_Modal_Price ~ Year',
                                        data=group,
                                        groups=group['Market'],
                                        re_formula='Year')
               print("formula with no season")
          elif not has_yearly_variance(group) and has_seasonal_variance(group):
               formula = smf.mixedlm('log_Modal_Price ~ C(Season)',
                                        data=group,
                                        groups=group['Market'])
               print("formula with no year")
          else:
               formula = smf.mixedlm('log_Modal_Price ~ 1',
                                        data=group,
                                        groups=group['Market'])
               print("formula with no year and no season")
     
     else:
          if has_seasonal_variance(group):
               if group['Season'].value_counts().shape[0] >= 2:
                    formula = smf.mixedlm('log_Modal_Price ~ C(Season)',
                                    data=group,
                                    groups=group['Market'])
                    print("formula with season only")
               else:
                    formula = smf.mixedlm('log_Modal_Price ~ 1',
                                    data=group,
                                    groups=group['Market'])
                    print("formula with no season and no year")
          else:
               formula = smf.mixedlm('log_Modal_Price ~ 1',
                                    data=group,
                                    groups=group['Market'])    
               print("formula with no season and no year")
     # if using_mixedlm:                
     result = formula.fit(method="lbfgs", reml=True)
     # if using_mixedlm:
     #      result = formula.fit(method="lbfgs", reml=False)
     # else:  # OLS
     #      result = formula.fit()
     print(f'Product: {prod}')
     print(result.summary())         

       

In [None]:
df[df['Product_Type'] == 'Amla(Nelli Kai)|Amla|FAQ']['Market'].value_counts().shape

In [None]:
df[df['Product_Type'] == 'Ashgourd|Ashgourd|FAQ']['Market'].value_counts().shape

In [None]:
df[df['Product_Type'] == 'Arecanut(Betelnut/Supari)|Supari|FAQ']['Market'].value_counts().shape

In [None]:
def prepare_for_mixedlm(df, min_years=2, min_obs=30):
    refined = []
    for prod, group in df.groupby("Product_Type"):
        # keep only if there’s enough obs
        if len(group) < min_obs:
            continue

        # check markets have at least min_years variation
        year_counts = group.groupby("Market")["Year"].nunique()
        if year_counts.max() < min_years:
            continue

        refined.append(group)

    return pd.concat(refined, axis=0)

df = prepare_for_mixedlm(df)

In [None]:
df['Product_Type'].nunique(), 

In [None]:
df[df['Product_Type'] == 'Amphophalus|Amphophalus|FAQ'].shape

In [None]:
df['log_Modal_Price'] = df['Modal_Price'].apply(lambda x: np.log(x) if x > 0 else 0)
for Product_Type in df['Product_Type'].unique():
    print(f"Product Type: {Product_Type}")
    subset = df[df['Product_Type'] == Product_Type]
    model = smf.mixedlm('log_Modal_Price ~ C(Season) + Year',
                        data=subset,
                        groups=subset['Market'],
                        re_formula='Year')
    result = model.fit(method="lbfgs", reml=False)
    print(result.summary())
    print("\n" + "="*80 + "\n")

In [None]:
m_ml   = m.fit(method="lbfgs", reml=False)  # model selection
m_reml = m.fit(method="lbfgs", reml=True)   # final interpretation

In [None]:
df.dtypes

In [None]:
pd.crosstab(df['Market'], df['Season'])

In [None]:
pd.crosstab(df['Product_Type'], df['Market'])

In [None]:
pd.crosstab(df['Market'], df['Year'])

In [None]:
pd.crosstab(df['Product_Type'], df['Year'])

In [None]:
model = smf.mixedlm(
    "Modal_Price ~ C(Product_Type) + C(Season) + Year",
    data=df,
    groups=df["Market"],
    re_formula="~Year"
)
result = model.fit(method="lbfgs", reml=True)


In [None]:
result.summary()

**Observation:**  
Mixed-effects model results show clear, consistent price differences between markets, even after controlling for product type and arrival date.  
This suggests that market-level factors (e.g., supply chain, demand patterns, VFPCK vs. non-VFPCK operations) play a significant role in price variation.  
These coefficients provide a baseline-adjusted measure of each market’s typical pricing, which we will use to rank markets by relative price levels.

In [None]:
# Get market ranking
fe_params = result.fe_params
market_ranking = fe_params.filter(like='C(Market)')
market_ranking.index = market_ranking.index.str.replace('C(Market)[T.', '').str.replace(']', '')
market_ranking['Aluva'] = 0  # Add Aluva as baseline
market_ranking.sort_values(ascending=True, inplace=True)
print(market_ranking)

We observe that most VFPCK markets tend to offer lower prices compared to non-VFPCK markets.  
Earlier seasonal analysis showed that VFPCK prices also fluctuate with seasonality.  
From a consumer’s perspective, these lower prices are beneficial, but for farmers, they may reduce profit margins.  

Higher prices in urban or semi-urban markets like Angamaly and Ernakulam could be driven by stronger demand and higher willingness to pay, possibly amplified by transportation and logistics costs.  
However, to draw a stronger conclusion, we would need to map each market’s location, classify them as urban/rural, and examine their surrounding population and demand patterns.