In [1]:
import statsmodels.formula.api as smf
import pandas as pd
import numpy as np

from utils.load_data_for_eda import load_data
df = load_data('data/commodity_prices.csv')

# import warnings
# warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)



In [2]:
df.head()

Unnamed: 0,Product_Type,Commodity,Variety_Type,Arrival_Date,Market,Is_VFPCK,Season,Year,Modal_Price,Max_Price,Min_Price
0,Alsandikai|Alsandikai|FAQ,Alsandikai,Alsandikai|Alsandikai,2023-12-13,North Paravur,False,Winter,2023,5200.0,6000.0,5000.0
1,Alsandikai|Alsandikai|FAQ,Alsandikai,Alsandikai|Alsandikai,2023-12-14,North Paravur,False,Winter,2023,6200.0,6500.0,6000.0
2,Alsandikai|Alsandikai|FAQ,Alsandikai,Alsandikai|Alsandikai,2023-12-16,North Paravur,False,Winter,2023,4800.0,5600.0,4600.0
3,Alsandikai|Alsandikai|FAQ,Alsandikai,Alsandikai|Alsandikai,2023-12-18,North Paravur,False,Winter,2023,3500.0,4500.0,3000.0
4,Alsandikai|Alsandikai|FAQ,Alsandikai,Alsandikai|Alsandikai,2023-12-19,North Paravur,False,Winter,2023,5500.0,6000.0,5500.0


In [3]:
df[df['Product_Type'] == 'Banana|Nendra Bale|Large']['Market'].nunique()    

15

In [4]:
def has_yearly_variance(group, tol=0.05):
    yearly_means = group.groupby(['Market', 'Year'])['log_Modal_Price'].mean()
    return yearly_means.max() - yearly_means.min() > tol

In [5]:
def has_seasonal_variance(group, tol=0.05):
    seasonal_means = group.groupby(['Market', 'Season'])['log_Modal_Price'].mean()
    # Check if the difference between max and min seasonal mean exceeds tol
    return seasonal_means.max() - seasonal_means.min() > tol

In [6]:
using_mixedlm = True
for prod, group in df.groupby('Product_Type'):
     year_counts = group['Year'].nunique()
     group['log_Modal_Price'] = group['Modal_Price'].apply(lambda x: np.log(x) if x>0 else np.nan)
     if year_counts > 2 and group['Market'].nunique() > 2:
     #if year_counts > 1:
          if has_yearly_variance(group) and has_seasonal_variance(group):
                    if group['Season'].value_counts().shape[0] >= 2:
                         formula = smf.mixedlm('log_Modal_Price ~ C(Season) + Year',
                                             data=group,
                                             groups=group['Market'],
                                             re_formula='Year')
                         print("rich formula") 
                    else:
                         formula = smf.mixedlm('log_Modal_Price ~ Year',
                                             data=group,
                                             groups=group['Market'],
                                             re_formula='Year')
                         print("formula with no season")
          elif has_yearly_variance(group) and not has_seasonal_variance(group):
               formula = smf.mixedlm('log_Modal_Price ~ Year',
                                        data=group,
                                        groups=group['Market'],
                                        re_formula='Year')
               print("formula with no season")
          elif not has_yearly_variance(group) and has_seasonal_variance(group):
               formula = smf.mixedlm('log_Modal_Price ~ C(Season)',
                                        data=group,
                                        groups=group['Market'])
               print("formula with no year")
          else:
               formula = smf.mixedlm('log_Modal_Price ~ 1',
                                        data=group,
                                        groups=group['Market'])
               print("formula with no year and no season")
     elif year_counts == 2 and group['Market'].nunique() > 2:
          if has_seasonal_variance(group):
               if group['Season'].value_counts().shape[0] >= 2:
                    formula = smf.mixedlm('log_Modal_Price ~ C(Season)',
                                    data=group,
                                    groups=group['Market'])
                    print("formula with season only")
               else:
                    formula = smf.mixedlm('log_Modal_Price ~ 1',
                                    data=group,
                                    groups=group['Market'])
                    print("formula with no season and no year")
          else:
               formula = smf.mixedlm('log_Modal_Price ~ 1',
                                    data=group,
                                    groups=group['Market'])    
               print("formula with no season and no year")
     else:
          if has_yearly_variance(group) and has_seasonal_variance(group):
               if group['Season'].value_counts().shape[0] >= 2:
                    formula = smf.ols('log_Modal_Price ~ C(Season)', data=group)
                    using_mixedlm = False
                    print("OLS: only season effect")
               else:
                    formula = smf.ols('log_Modal_Price ~ 1', data=group)
                    using_mixedlm = False
                    print("OLS: constant mean only")
          elif has_seasonal_variance(group):
               formula = smf.ols('log_Modal_Price ~ C(Season)', data=group)
               using_mixedlm = False
               print("OLS: only season effect")
          else:
               formula = smf.ols('log_Modal_Price ~ 1', data=group)
               using_mixedlm = False
               print("OLS: constant mean only")
               
     if using_mixedlm:
          result = formula.fit(method="lbfgs", reml=False)
     else:  # OLS
          result = formula.fit()
     print(f'Product: {prod}')
     print(result.summary())         

       

OLS: only season effect
Product: Alsandikai|Alsandikai|FAQ
                            OLS Regression Results                            
Dep. Variable:        log_Modal_Price   R-squared:                       0.386
Model:                            OLS   Adj. R-squared:                  0.382
Method:                 Least Squares   F-statistic:                     97.89
Date:                Thu, 28 Aug 2025   Prob (F-statistic):           3.60e-49
Time:                        12:35:42   Log-Likelihood:                 193.01
No. Observations:                 471   AIC:                            -378.0
Df Residuals:                     467   BIC:                            -361.4
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------



Product: Amphophalus|Amphophalus|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      1875         Method:                  REML           
No. Groups:            5            Scale:                   0.0158         
Min. group size:       111          Log-Likelihood:          1166.9885      
Max. group size:       609          Converged:               Yes            
Mean group size:       375.0                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.    z    P>|z|  [0.025 0.975]
----------------------------------------------------------------------------
Intercept                      -11.053   10.517  -1.051 0.293 -31.666  9.560
C(Season)[T.Southwest Monsoon]   0.054    0.008   6.445 0.000   0.037  0.070
C(Season)[T.Summer]             -0.063    0.010  -6.427 0.0



Product: Ashgourd|Ashgourd|FAQ
                  Mixed Linear Model Regression Results
Model:                MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:     5166        Method:                 REML           
No. Groups:           11          Scale:                  0.0647         
Min. group size:      116         Log-Likelihood:         -382.5014      
Max. group size:      661         Converged:              No             
Mean group size:      469.6                                              
-------------------------------------------------------------------------
                               Coef.  Std.Err.   z    P>|z| [0.025 0.975]
-------------------------------------------------------------------------
Intercept                      36.090   12.060  2.992 0.003 12.452 59.727
C(Season)[T.Southwest Monsoon]  0.433    0.011 41.184 0.000  0.412  0.453
C(Season)[T.Summer]             0.053    0.012  4.508 0.000  0.030  0.076
C(Season)[T.Winter]      



Product: Banana - Green|Banana - Green|FAQ
                     Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:        4487          Method:                   REML           
No. Groups:              10            Scale:                    0.0535         
Min. group size:         66            Log-Likelihood:           87.9945        
Max. group size:         662           Converged:                Yes            
Mean group size:         448.7                                                  
--------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975] 
--------------------------------------------------------------------------------
Intercept                      -350.136   11.675 -29.991 0.000 -373.018 -327.254
C(Season)[T.Southwest Monsoon]    0.085    0.010   8.321 0.000    0.065    0.105
C(Seaso



Product: Banana|Nendra Bale|Large
                     Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:        4987          Method:                   REML           
No. Groups:              15            Scale:                    0.0324         
Min. group size:         54            Log-Likelihood:           1424.0451      
Max. group size:         593           Converged:                Yes            
Mean group size:         332.5                                                  
--------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975] 
--------------------------------------------------------------------------------
Intercept                      -373.848   11.114 -33.637 0.000 -395.632 -352.065
C(Season)[T.Southwest Monsoon]   -0.010    0.008  -1.203 0.229   -0.026    0.006
C(Season)[T.Summ



Product: Banana|Palayamthodan|Large
                     Mixed Linear Model Regression Results
Model:                  MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:       3395          Method:                   REML           
No. Groups:             10            Scale:                    0.0467         
Min. group size:        74            Log-Likelihood:           299.2450       
Max. group size:        591           Converged:                No             
Mean group size:        339.5                                                  
-------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975]
-------------------------------------------------------------------------------
Intercept                      -128.929   14.792  -8.716 0.000 -157.922 -99.937
C(Season)[T.Southwest Monsoon]    0.219    0.012  18.479 0.000    0.196   0.242
C(Season)[T.Summer]      



Product: Banana|Poovan|Large
                     Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:        2724          Method:                   REML           
No. Groups:              7             Scale:                    0.0310         
Min. group size:         74            Log-Likelihood:           785.3647       
Max. group size:         590           Converged:                Yes            
Mean group size:         389.1                                                  
--------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975] 
--------------------------------------------------------------------------------
Intercept                      -261.165   13.152 -19.857 0.000 -286.942 -235.387
C(Season)[T.Southwest Monsoon]    0.069    0.011   6.283 0.000    0.048    0.091
C(Season)[T.Summer]  



Product: Banana|Robusta|Large
                    Mixed Linear Model Regression Results
Model:                   MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:        2126         Method:                  REML           
No. Groups:              6            Scale:                   0.0551         
Min. group size:         72           Log-Likelihood:          -4.0518        
Max. group size:         587          Converged:               Yes            
Mean group size:         354.3                                                
------------------------------------------------------------------------------
                                Coef.   Std.Err.   z    P>|z|  [0.025   0.975]
------------------------------------------------------------------------------
Intercept                      -102.887   22.058 -4.664 0.000 -146.120 -59.654
C(Season)[T.Southwest Monsoon]    0.008    0.017  0.492 0.623   -0.025   0.041
C(Season)[T.Summer]               0.008    



Product: Bhindi(Ladies Finger)|Bhindi|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      3713         Method:                  REML           
No. Groups:            8            Scale:                   0.0407         
Min. group size:       66           Log-Likelihood:          585.1671       
Max. group size:       661          Converged:               Yes            
Mean group size:       464.1                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                      -34.934   11.236 -3.109 0.002 -56.957 -12.912
C(Season)[T.Southwest Monsoon]   0.062    0.010  6.460 0.000   0.043   0.080
C(Season)[T.Summer]              0.049    0.011  4.516



Product: Bitter gourd|Bitter Gourd|FAQ
                   Mixed Linear Model Regression Results
Model:                  MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:       4055        Method:                 REML           
No. Groups:             9           Scale:                  0.0375         
Min. group size:        94          Log-Likelihood:         797.3433       
Max. group size:        663         Converged:              Yes            
Mean group size:        450.6                                              
---------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z| [0.025  0.975]
---------------------------------------------------------------------------
Intercept                      107.582   10.139 10.611 0.000 87.711 127.454
C(Season)[T.Southwest Monsoon]   0.241    0.009 27.473 0.000  0.224   0.258
C(Season)[T.Summer]              0.302    0.010 30.001 0.000  0.282 



Product: Brinjal|Brinjal|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      2879         Method:                  REML           
No. Groups:            7            Scale:                   0.0282         
Min. group size:       68           Log-Likelihood:          969.2133       
Max. group size:       659          Converged:               Yes            
Mean group size:       411.3                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                      -51.895   10.557 -4.916 0.000 -72.586 -31.203
C(Season)[T.Southwest Monsoon]   0.066    0.009  7.618 0.000   0.049   0.083
C(Season)[T.Summer]             -0.074    0.010 -7.093 0.000  -0.09



Product: Cabbage|Cabbage|FAQ
                  Mixed Linear Model Regression Results
Model:                MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:     3383        Method:                 REML           
No. Groups:           7           Scale:                  0.0487         
Min. group size:      74          Log-Likelihood:         277.7109       
Max. group size:      662         Converged:              Yes            
Mean group size:      483.3                                              
-------------------------------------------------------------------------
                               Coef.  Std.Err.   z    P>|z| [0.025 0.975]
-------------------------------------------------------------------------
Intercept                      33.938   12.947  2.621 0.009  8.563 59.314
C(Season)[T.Southwest Monsoon]  0.111    0.011  9.881 0.000  0.089  0.133
C(Season)[T.Summer]            -0.018    0.013 -1.454 0.146 -0.043  0.006
C(Season)[T.Winter]        



Product: Carrot|Carrot|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      3418         Method:                  REML           
No. Groups:            8            Scale:                   0.0612         
Min. group size:       52           Log-Likelihood:          -165.6109      
Max. group size:       662          Converged:               Yes            
Mean group size:       427.2                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                      -72.413   13.977 -5.181 0.000 -99.807 -45.019
C(Season)[T.Southwest Monsoon]   0.199    0.012 16.052 0.000   0.175   0.223
C(Season)[T.Summer]             -0.117    0.014 -8.302 0.000  -0.145 



Product: Cauliflower|Other|FAQ
                  Mixed Linear Model Regression Results
Model:                 MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:      1713        Method:                 REML           
No. Groups:            3           Scale:                  0.0352         
Min. group size:       553         Log-Likelihood:         400.2442       
Max. group size:       606         Converged:              No             
Mean group size:       571.0                                              
--------------------------------------------------------------------------
                               Coef.  Std.Err.   z    P>|z|  [0.025 0.975]
--------------------------------------------------------------------------
Intercept                       5.049   14.165  0.356 0.722 -22.714 32.811
C(Season)[T.Southwest Monsoon]  0.104    0.013  7.986 0.000   0.078  0.129
C(Season)[T.Summer]            -0.073    0.015 -4.862 0.000  -0.102 -0.044
C(Season)[T.W



Product: Colacasia|Other|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      3330         Method:                  REML           
No. Groups:            7            Scale:                   0.0189         
Min. group size:       63           Log-Likelihood:          1804.2972      
Max. group size:       653          Converged:               No             
Mean group size:       475.7                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                      -69.510    7.614 -9.129 0.000 -84.434 -54.586
C(Season)[T.Southwest Monsoon]  -0.065    0.007 -9.413 0.000  -0.078  -0.051
C(Season)[T.Summer]             -0.048    0.008 -6.034 0.000  -0.06



Product: Cowpea(Veg)|Cowpea (Veg)|FAQ
                  Mixed Linear Model Regression Results
Model:                MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:     2970        Method:                 REML           
No. Groups:           10          Scale:                  0.0666         
Min. group size:      51          Log-Likelihood:         -228.4065      
Max. group size:      608         Converged:              Yes            
Mean group size:      297.0                                              
-------------------------------------------------------------------------
                               Coef.  Std.Err.   z    P>|z| [0.025 0.975]
-------------------------------------------------------------------------
Intercept                      49.028   17.554  2.793 0.005 14.623 83.433
C(Season)[T.Southwest Monsoon]  0.054    0.015  3.629 0.000  0.025  0.083
C(Season)[T.Summer]            -0.104    0.016 -6.365 0.000 -0.136 -0.072
C(Season)[T.Winter



Product: Cucumbar(Kheera)|Cucumbar|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      4000         Method:                  REML           
No. Groups:            10           Scale:                   0.0756         
Min. group size:       88           Log-Likelihood:          -624.0790      
Max. group size:       659          Converged:               No             
Mean group size:       400.0                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                      -66.682   15.084 -4.421 0.000 -96.247 -37.118
C(Season)[T.Southwest Monsoon]   0.120    0.013  9.272 0.000   0.094   0.145
C(Season)[T.Summer]             -0.084    0.014 -5.922 0.



Product: Drumstick|Drumstick|FAQ
                     Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:        3317          Method:                   REML           
No. Groups:              7             Scale:                    0.1622         
Min. group size:         74            Log-Likelihood:           -1714.8084     
Max. group size:         651           Converged:                Yes            
Mean group size:         473.9                                                  
--------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975] 
--------------------------------------------------------------------------------
Intercept                      -202.101   24.020  -8.414 0.000 -249.179 -155.023
C(Season)[T.Southwest Monsoon]   -0.070    0.021  -3.373 0.001   -0.110   -0.029
C(Season)[T.Summe



Product: Elephant Yam (Suran)|Other|FAQ
                     Mixed Linear Model Regression Results
Model:                  MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:       1889          Method:                   REML           
No. Groups:             4             Scale:                    0.0197         
Min. group size:        63            Log-Likelihood:           980.3444       
Max. group size:        652           Converged:                No             
Mean group size:        472.2                                                  
-------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975]
-------------------------------------------------------------------------------
Intercept                      -119.475   10.195 -11.719 0.000 -139.457 -99.493
C(Season)[T.Southwest Monsoon]   -0.024    0.009  -2.575 0.010   -0.042  -0.006
C(Season)[T.Summer]  



Product: Ginger(Green)|Green Ginger|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      3054         Method:                  REML           
No. Groups:            8            Scale:                   0.0863         
Min. group size:       61           Log-Likelihood:          -682.2416      
Max. group size:       660          Converged:               Yes            
Mean group size:       381.8                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                      940.496   17.835 52.733 0.000 905.540 975.452
C(Season)[T.Southwest Monsoon]   0.201    0.016 12.746 0.000   0.170   0.232
C(Season)[T.Summer]              0.133    0.018  7.433 0



Product: Indian Beans (Seam)|Indian Beans (Seam)|FAQ
                  Mixed Linear Model Regression Results
Model:                 MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:      1409        Method:                 REML           
No. Groups:            3           Scale:                  0.0587         
Min. group size:       370         Log-Likelihood:         -36.1092       
Max. group size:       587         Converged:              Yes            
Mean group size:       469.7                                              
--------------------------------------------------------------------------
                               Coef.  Std.Err.   z    P>|z|  [0.025 0.975]
--------------------------------------------------------------------------
Intercept                      33.266   24.311  1.368 0.171 -14.383 80.916
C(Season)[T.Southwest Monsoon]  0.018    0.021  0.835 0.404  -0.024  0.059
C(Season)[T.Summer]             0.003    0.021  0.156 0.876  -0.03



Product: Little gourd (Kundru)|Other|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      3509         Method:                  REML           
No. Groups:            6            Scale:                   0.0383         
Min. group size:       539          Log-Likelihood:          709.2108       
Max. group size:       661          Converged:               Yes            
Mean group size:       584.8                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                      -43.662   10.224 -4.271 0.000 -63.700 -23.624
C(Season)[T.Southwest Monsoon]   0.145    0.009 15.444 0.000   0.126   0.163
C(Season)[T.Summer]             -0.056    0.011 -5.086 



Product: Onion|Big|FAQ
                     Mixed Linear Model Regression Results
Model:                  MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:       3086          Method:                   REML           
No. Groups:             7             Scale:                    0.0752         
Min. group size:        108           Log-Likelihood:           -465.4916      
Max. group size:        605           Converged:                Yes            
Mean group size:        440.9                                                  
-------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975]
-------------------------------------------------------------------------------
Intercept                      -129.786   16.076  -8.073 0.000 -161.295 -98.278
C(Season)[T.Southwest Monsoon]   -0.515    0.014 -36.838 0.000   -0.542  -0.487
C(Season)[T.Summer]              -0.67



Product: Pineapple|Other|Medium
                  Mixed Linear Model Regression Results
Model:                 MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:      1354        Method:                 REML           
No. Groups:            3           Scale:                  0.0882         
Min. group size:       372         Log-Likelihood:         -310.5894      
Max. group size:       559         Converged:              Yes            
Mean group size:       451.3                                              
--------------------------------------------------------------------------
                               Coef.  Std.Err.   z    P>|z|  [0.025 0.975]
--------------------------------------------------------------------------
Intercept                      33.151   32.673  1.015 0.310 -30.887 97.188
C(Season)[T.Southwest Monsoon] -0.153    0.026 -5.925 0.000  -0.204 -0.103
C(Season)[T.Summer]            -0.126    0.027 -4.747 0.000  -0.178 -0.074
C(Season)[T.



Product: Pineapple|Pine Apple|Medium
                    Mixed Linear Model Regression Results
Model:                  MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:       2477         Method:                  REML           
No. Groups:             5            Scale:                   0.0623         
Min. group size:        301          Log-Likelihood:          -133.7201      
Max. group size:        644          Converged:               Yes            
Mean group size:        495.4                                                
-----------------------------------------------------------------------------
                                Coef.  Std.Err.    z    P>|z|  [0.025  0.975]
-----------------------------------------------------------------------------
Intercept                      -65.518   16.911  -3.874 0.000 -98.662 -32.373
C(Season)[T.Southwest Monsoon]  -0.138    0.014  -9.701 0.000  -0.165  -0.110
C(Season)[T.Summer]             -0.089    0.017



Product: Potato|Potato|FAQ
                     Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:        2454          Method:                   REML           
No. Groups:              5             Scale:                    0.0283         
Min. group size:         74            Log-Likelihood:           864.5230       
Max. group size:         661           Converged:                Yes            
Mean group size:         490.8                                                  
--------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975] 
--------------------------------------------------------------------------------
Intercept                      -145.597   10.567 -13.778 0.000 -166.308 -124.886
C(Season)[T.Southwest Monsoon]   -0.143    0.010 -14.837 0.000   -0.162   -0.124
C(Season)[T.Summer]    



Product: Pumpkin|Pumpkin|FAQ
                  Mixed Linear Model Regression Results
Model:                MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:     2546        Method:                 REML           
No. Groups:           6           Scale:                  0.0301         
Min. group size:      71          Log-Likelihood:         777.3345       
Max. group size:      651         Converged:              No             
Mean group size:      424.3                                              
-------------------------------------------------------------------------
                               Coef.  Std.Err.   z    P>|z| [0.025 0.975]
-------------------------------------------------------------------------
Intercept                      68.345   11.427  5.981 0.000 45.948 90.741
C(Season)[T.Southwest Monsoon]  0.157    0.010 16.305 0.000  0.138  0.176
C(Season)[T.Summer]             0.053    0.012  4.497 0.000  0.030  0.076
C(Season)[T.Winter]        



Product: Snakeguard|Other|FAQ
                   Mixed Linear Model Regression Results
Model:                  MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:       2838        Method:                 REML           
No. Groups:             6           Scale:                  0.0701         
Min. group size:        371         Log-Likelihood:         -323.5925      
Max. group size:        590         Converged:              No             
Mean group size:        473.0                                              
---------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025 0.975]
---------------------------------------------------------------------------
Intercept                      -13.465   18.628 -0.723 0.470 -49.976 23.045
C(Season)[T.Southwest Monsoon]   0.117    0.015  7.603 0.000   0.087  0.147
C(Season)[T.Summer]             -0.075    0.016 -4.648 0.000  -0.106 -0.043
C



Product: Snakeguard|Snakeguard|FAQ
                     Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:        2622          Method:                   REML           
No. Groups:              7             Scale:                    0.0356         
Min. group size:         56            Log-Likelihood:           618.0829       
Max. group size:         652           Converged:                No             
Mean group size:         374.6                                                  
--------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975] 
--------------------------------------------------------------------------------
Intercept                      -139.375   11.809 -11.802 0.000 -162.520 -116.230
C(Season)[T.Southwest Monsoon]    0.024    0.011   2.299 0.022    0.004    0.045
C(Season)[T.Sum



Product: Tapioca|Other|FAQ
                     Mixed Linear Model Regression Results
Model:                  MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:       2445          Method:                   REML           
No. Groups:             5             Scale:                    0.0221         
Min. group size:        343           Log-Likelihood:           1154.9254      
Max. group size:        587           Converged:                Yes            
Mean group size:        489.0                                                  
-------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975]
-------------------------------------------------------------------------------
Intercept                      -105.936   10.588 -10.006 0.000 -126.687 -85.185
C(Season)[T.Southwest Monsoon]   -0.113    0.010 -11.857 0.000   -0.132  -0.094
C(Season)[T.Summer]              -



Product: Tapioca|Tapioca|FAQ
                    Mixed Linear Model Regression Results
Model:                  MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:       4274         Method:                  REML           
No. Groups:             9            Scale:                   0.0149         
Min. group size:        73           Log-Likelihood:          2822.8532      
Max. group size:        634          Converged:               No             
Mean group size:        474.9                                                
-----------------------------------------------------------------------------
                                Coef.  Std.Err.    z    P>|z|  [0.025  0.975]
-----------------------------------------------------------------------------
Intercept                      -83.903    5.904 -14.212 0.000 -95.474 -72.333
C(Season)[T.Southwest Monsoon]  -0.120    0.005 -22.417 0.000  -0.131  -0.110
C(Season)[T.Summer]             -0.087    0.006 -13.582



In [7]:
using_mixedlm = True
for prod, group in df.groupby('Product_Type'):
     year_counts = group['Year'].nunique()
     group['log_Modal_Price'] = group['Modal_Price'].apply(lambda x: np.log(x) if x>0 else np.nan)
     #if year_counts > 1 and group['Market'].nunique() > 1:
     if year_counts > 1:
          if has_yearly_variance(group) and has_seasonal_variance(group):
                    if group['Season'].value_counts().shape[0] >= 2:
                         formula = smf.mixedlm('log_Modal_Price ~ C(Season) + Year',
                                             data=group,
                                             groups=group['Market'],
                                             re_formula='Year')
                         print("rich formula") 
                    else:
                         formula = smf.mixedlm('log_Modal_Price ~ Year',
                                             data=group,
                                             groups=group['Market'],
                                             re_formula='Year')
                         print("formula with no season")
          elif has_yearly_variance(group) and not has_seasonal_variance(group):
               formula = smf.mixedlm('log_Modal_Price ~ Year',
                                        data=group,
                                        groups=group['Market'],
                                        re_formula='Year')
               print("formula with no season")
          elif not has_yearly_variance(group) and has_seasonal_variance(group):
               formula = smf.mixedlm('log_Modal_Price ~ C(Season)',
                                        data=group,
                                        groups=group['Market'])
               print("formula with no year")
          else:
               formula = smf.mixedlm('log_Modal_Price ~ 1',
                                        data=group,
                                        groups=group['Market'])
               print("formula with no year and no season")
     
     else:
          if has_seasonal_variance(group):
               if group['Season'].value_counts().shape[0] >= 2:
                    formula = smf.mixedlm('log_Modal_Price ~ C(Season)',
                                    data=group,
                                    groups=group['Market'])
                    print("formula with season only")
               else:
                    formula = smf.mixedlm('log_Modal_Price ~ 1',
                                    data=group,
                                    groups=group['Market'])
                    print("formula with no season and no year")
          else:
               formula = smf.mixedlm('log_Modal_Price ~ 1',
                                    data=group,
                                    groups=group['Market'])    
               print("formula with no season and no year")
     # if using_mixedlm:                
     result = formula.fit(method="lbfgs", reml=True)
     # if using_mixedlm:
     #      result = formula.fit(method="lbfgs", reml=False)
     # else:  # OLS
     #      result = formula.fit()
     print(f'Product: {prod}')
     print(result.summary())         

       

rich formula
Product: Alsandikai|Alsandikai|FAQ
                     Mixed Linear Model Regression Results
Model:                  MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:       471           Method:                   REML           
No. Groups:             1             Scale:                    0.0254         
Min. group size:        471           Log-Likelihood:           182.6520       
Max. group size:        471           Converged:                Yes            
Mean group size:        471.0                                                  
-------------------------------------------------------------------------------
                                Coef.    Std.Err.     z    P>|z| [0.025  0.975]
-------------------------------------------------------------------------------
Intercept                      112.078       29.713  3.772 0.000 53.842 170.314
C(Season)[T.Southwest Monsoon]   0.252        0.025 10.250 0.000  0.204   0.301
C(Season)[T.S



rich formula
Product: Amphophalus|Other|FAQ
                   Mixed Linear Model Regression Results
Model:                  MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:       665         Method:                 REML           
No. Groups:             2           Scale:                  0.0157         
Min. group size:        101         Log-Likelihood:         417.0697       
Max. group size:        564         Converged:              No             
Mean group size:        332.5                                              
---------------------------------------------------------------------------
                                Coef.   Std.Err.   z    P>|z| [0.025 0.975]
---------------------------------------------------------------------------
Intercept                      -203.491                                    
C(Season)[T.Southwest Monsoon]    0.012    0.013  0.948 0.343 -0.013  0.038
C(Season)[T.Summer]               0.035    0.016  2.097 0.036  

  sdf[0:self.k_fe, 1] = np.sqrt(np.diag(self.cov_params()[0:self.k_fe]))


Product: Banana - Green|Banana - Green|FAQ
                     Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:        4487          Method:                   REML           
No. Groups:              10            Scale:                    0.0535         
Min. group size:         66            Log-Likelihood:           120.8158       
Max. group size:         662           Converged:                Yes            
Mean group size:         448.7                                                  
--------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975] 
--------------------------------------------------------------------------------
Intercept                      -350.136   11.675 -29.991 0.000 -373.018 -327.254
C(Season)[T.Southwest Monsoon]    0.085    0.010   8.321 0.000    0.065    0.105
C(Seaso



Product: Banana|Palayamthodan|Large
                     Mixed Linear Model Regression Results
Model:                  MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:       3395          Method:                   REML           
No. Groups:             10            Scale:                    0.0467         
Min. group size:        74            Log-Likelihood:           299.2450       
Max. group size:        591           Converged:                No             
Mean group size:        339.5                                                  
-------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975]
-------------------------------------------------------------------------------
Intercept                      -128.929   14.792  -8.716 0.000 -157.922 -99.937
C(Season)[T.Southwest Monsoon]    0.219    0.012  18.479 0.000    0.196   0.242
C(Season)[T.Summer]      



Product: Bhindi(Ladies Finger)|Bhindi|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      3713         Method:                  REML           
No. Groups:            8            Scale:                   0.0407         
Min. group size:       66           Log-Likelihood:          613.4330       
Max. group size:       661          Converged:               Yes            
Mean group size:       464.1                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                      -34.934   11.236 -3.109 0.002 -56.957 -12.912
C(Season)[T.Southwest Monsoon]   0.062    0.010  6.460 0.000   0.043   0.080
C(Season)[T.Summer]              0.049    0.011  4.516



rich formula
Product: Brinjal|Other|FAQ
                  Mixed Linear Model Regression Results
Model:                MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:     1152        Method:                 REML           
No. Groups:           2           Scale:                  0.0381         
Min. group size:      559         Log-Likelihood:         227.9902       
Max. group size:      593         Converged:              No             
Mean group size:      576.0                                              
-------------------------------------------------------------------------
                               Coef.  Std.Err.   z    P>|z| [0.025 0.975]
-------------------------------------------------------------------------
Intercept                      58.046   17.730  3.274 0.001 23.296 92.796
C(Season)[T.Southwest Monsoon]  0.111    0.016  6.837 0.000  0.079  0.143
C(Season)[T.Summer]             0.007    0.019  0.384 0.701 -0.030  0.045
C(Season)[T.Wint



rich formula
Product: Carrot|Carrot|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      3418         Method:                  REML           
No. Groups:            8            Scale:                   0.0612         
Min. group size:       52           Log-Likelihood:          -123.9227      
Max. group size:       662          Converged:               No             
Mean group size:       427.2                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                      -72.429   13.977 -5.182 0.000 -99.823 -45.035
C(Season)[T.Southwest Monsoon]   0.199    0.012 16.051 0.000   0.175   0.223
C(Season)[T.Summer]             -0.117    0.014 -8.302 0



Product: Cauliflower|Cauliflower|FAQ
                    Mixed Linear Model Regression Results
Model:                  MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:       1908         Method:                  REML           
No. Groups:             4            Scale:                   0.0272         
Min. group size:        74           Log-Likelihood:          699.9522       
Max. group size:        659          Converged:               Yes            
Mean group size:        477.0                                                
-----------------------------------------------------------------------------
                                Coef.  Std.Err.    z    P>|z|  [0.025  0.975]
-----------------------------------------------------------------------------
Intercept                      -52.728   11.844  -4.452 0.000 -75.942 -29.515
C(Season)[T.Southwest Monsoon]   0.067    0.011   6.241 0.000   0.046   0.088
C(Season)[T.Summer]             -0.131    0.013



Product: Colacasia|Other|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      3330         Method:                  REML           
No. Groups:            7            Scale:                   0.0189         
Min. group size:       63           Log-Likelihood:          1821.7260      
Max. group size:       653          Converged:               No             
Mean group size:       475.7                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                      -69.510    7.614 -9.129 0.000 -84.434 -54.586
C(Season)[T.Southwest Monsoon]  -0.065    0.007 -9.413 0.000  -0.078  -0.051
C(Season)[T.Summer]             -0.048    0.008 -6.034 0.000  -0.06



Product: Cucumbar(Kheera)|Cucumbar|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      4000         Method:                  REML           
No. Groups:            10           Scale:                   0.0757         
Min. group size:       88           Log-Likelihood:          -558.3510      
Max. group size:       659          Converged:               Yes            
Mean group size:       400.0                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                      -66.905   15.101 -4.431 0.000 -96.502 -37.308
C(Season)[T.Southwest Monsoon]   0.120    0.013  9.266 0.000   0.094   0.145
C(Season)[T.Summer]             -0.084    0.014 -5.918 0.



Product: Elephant Yam (Suran)|Elephant Yam (Suran)|FAQ
                   Mixed Linear Model Regression Results
Model:                  MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:       1221        Method:                 REML           
No. Groups:             4           Scale:                  0.0244         
Min. group size:        92          Log-Likelihood:         496.1708       
Max. group size:        527         Converged:              No             
Mean group size:        305.2                                              
---------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025 0.975]
---------------------------------------------------------------------------
Intercept                      -39.780   17.521 -2.270 0.023 -74.120 -5.440
C(Season)[T.Southwest Monsoon]   0.170    0.015 11.596 0.000   0.142  0.199
C(Season)[T.Summer]              0.102    0.015  6.6



Product: Ginger(Green)|Green Ginger|FAQ
                    Mixed Linear Model Regression Results
Model:                  MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:       3054         Method:                  REML           
No. Groups:             8            Scale:                   0.0864         
Min. group size:        61           Log-Likelihood:          -636.4282      
Max. group size:        660          Converged:               Yes            
Mean group size:        381.8                                                
-----------------------------------------------------------------------------
                                Coef.  Std.Err.    z    P>|z|  [0.025  0.975]
-----------------------------------------------------------------------------
Intercept                      940.222   17.795  52.835 0.000 905.344 975.101
C(Season)[T.Southwest Monsoon]   0.201    0.016  12.729 0.000   0.170   0.232
C(Season)[T.Summer]              0.133    0.



Product: Indian Beans (Seam)|Indian Beans (Seam)|FAQ
                  Mixed Linear Model Regression Results
Model:                 MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:      1409        Method:                 REML           
No. Groups:            3           Scale:                  0.0587         
Min. group size:       370         Log-Likelihood:         -31.1437       
Max. group size:       587         Converged:              Yes            
Mean group size:       469.7                                              
--------------------------------------------------------------------------
                               Coef.  Std.Err.   z    P>|z|  [0.025 0.975]
--------------------------------------------------------------------------
Intercept                      33.266   24.311  1.368 0.171 -14.383 80.916
C(Season)[T.Southwest Monsoon]  0.018    0.021  0.835 0.404  -0.024  0.059
C(Season)[T.Summer]             0.003    0.021  0.156 0.876  -0.03



Product: Little gourd (Kundru)|Other|FAQ
                   Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      log_Modal_Price
No. Observations:      3509         Method:                  REML           
No. Groups:            6            Scale:                   0.0383         
Min. group size:       539          Log-Likelihood:          700.3421       
Max. group size:       661          Converged:               No             
Mean group size:       584.8                                                
----------------------------------------------------------------------------
                                Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                      -43.670   10.215 -4.275 0.000 -63.692 -23.649
C(Season)[T.Southwest Monsoon]   0.145    0.009 15.455 0.000   0.126   0.163
C(Season)[T.Summer]             -0.056    0.011 -5.090 



Product: Onion|Big|FAQ
                     Mixed Linear Model Regression Results
Model:                  MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:       3086          Method:                   REML           
No. Groups:             7             Scale:                    0.0752         
Min. group size:        108           Log-Likelihood:           -447.2626      
Max. group size:        605           Converged:                No             
Mean group size:        440.9                                                  
-------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975]
-------------------------------------------------------------------------------
Intercept                      -129.786   16.076  -8.073 0.000 -161.295 -98.278
C(Season)[T.Southwest Monsoon]   -0.515    0.014 -36.838 0.000   -0.542  -0.487
C(Season)[T.Summer]              -0.67



Product: Pineapple|Other|Medium
                  Mixed Linear Model Regression Results
Model:                 MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:      1354        Method:                 REML           
No. Groups:            3           Scale:                  0.0883         
Min. group size:       372         Log-Likelihood:         -298.5092      
Max. group size:       559         Converged:              Yes            
Mean group size:       451.3                                              
--------------------------------------------------------------------------
                               Coef.  Std.Err.   z    P>|z|  [0.025 0.975]
--------------------------------------------------------------------------
Intercept                      33.005   33.274  0.992 0.321 -32.212 98.221
C(Season)[T.Southwest Monsoon] -0.153    0.026 -5.919 0.000  -0.204 -0.103
C(Season)[T.Summer]            -0.126    0.027 -4.743 0.000  -0.178 -0.074
C(Season)[T.



Product: Potato|Other|FAQ
                     Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:        1055          Method:                   REML           
No. Groups:              2             Scale:                    0.0264         
Min. group size:         465           Log-Likelihood:           399.8166       
Max. group size:         590           Converged:                Yes            
Mean group size:         527.5                                                  
--------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975] 
--------------------------------------------------------------------------------
Intercept                      -161.498   15.670 -10.306 0.000 -192.210 -130.786
C(Season)[T.Southwest Monsoon]   -0.129    0.014  -9.171 0.000   -0.156   -0.101
C(Season)[T.Summer]     

  sdf[0:self.k_fe, 1] = np.sqrt(np.diag(self.cov_params()[0:self.k_fe]))


Product: Pumpkin|Pumpkin|FAQ
                  Mixed Linear Model Regression Results
Model:                MixedLM     Dependent Variable:     log_Modal_Price
No. Observations:     2546        Method:                 REML           
No. Groups:           6           Scale:                  0.0301         
Min. group size:      71          Log-Likelihood:         795.1518       
Max. group size:      651         Converged:              No             
Mean group size:      424.3                                              
-------------------------------------------------------------------------
                               Coef.  Std.Err.   z    P>|z| [0.025 0.975]
-------------------------------------------------------------------------
Intercept                      68.344   11.427  5.981 0.000 45.948 90.741
C(Season)[T.Southwest Monsoon]  0.157    0.010 16.305 0.000  0.138  0.176
C(Season)[T.Summer]             0.053    0.012  4.497 0.000  0.030  0.076
C(Season)[T.Winter]        



Product: Tapioca|Other|FAQ
                     Mixed Linear Model Regression Results
Model:                  MixedLM       Dependent Variable:       log_Modal_Price
No. Observations:       2445          Method:                   REML           
No. Groups:             5             Scale:                    0.0221         
Min. group size:        343           Log-Likelihood:           1154.9253      
Max. group size:        587           Converged:                Yes            
Mean group size:        489.0                                                  
-------------------------------------------------------------------------------
                                Coef.   Std.Err.    z    P>|z|  [0.025   0.975]
-------------------------------------------------------------------------------
Intercept                      -105.936   10.587 -10.006 0.000 -126.687 -85.185
C(Season)[T.Southwest Monsoon]   -0.113    0.010 -11.857 0.000   -0.132  -0.094
C(Season)[T.Summer]              -



In [None]:
df[df['Product_Type'] == 'Amla(Nelli Kai)|Amla|FAQ']['Market'].value_counts().shape

In [None]:
df[df['Product_Type'] == 'Ashgourd|Ashgourd|FAQ']['Market'].value_counts().shape

In [None]:
df[df['Product_Type'] == 'Arecanut(Betelnut/Supari)|Supari|FAQ']['Market'].value_counts().shape

In [None]:
def prepare_for_mixedlm(df, min_years=2, min_obs=30):
    refined = []
    for prod, group in df.groupby("Product_Type"):
        # keep only if there’s enough obs
        if len(group) < min_obs:
            continue

        # check markets have at least min_years variation
        year_counts = group.groupby("Market")["Year"].nunique()
        if year_counts.max() < min_years:
            continue

        refined.append(group)

    return pd.concat(refined, axis=0)

df = prepare_for_mixedlm(df)

In [None]:
df['Product_Type'].nunique(), 

In [None]:
df[df['Product_Type'] == 'Amphophalus|Amphophalus|FAQ'].shape

In [None]:
df['log_Modal_Price'] = df['Modal_Price'].apply(lambda x: np.log(x) if x > 0 else 0)
for Product_Type in df['Product_Type'].unique():
    print(f"Product Type: {Product_Type}")
    subset = df[df['Product_Type'] == Product_Type]
    model = smf.mixedlm('log_Modal_Price ~ C(Season) + Year',
                        data=subset,
                        groups=subset['Market'],
                        re_formula='Year')
    result = model.fit(method="lbfgs", reml=False)
    print(result.summary())
    print("\n" + "="*80 + "\n")

In [None]:
m_ml   = m.fit(method="lbfgs", reml=False)  # model selection
m_reml = m.fit(method="lbfgs", reml=True)   # final interpretation

In [None]:
df.dtypes

In [None]:
pd.crosstab(df['Market'], df['Season'])

In [None]:
pd.crosstab(df['Product_Type'], df['Market'])

In [None]:
pd.crosstab(df['Market'], df['Year'])

In [None]:
pd.crosstab(df['Product_Type'], df['Year'])

In [None]:
model = smf.mixedlm(
    "Modal_Price ~ C(Product_Type) + C(Season) + Year",
    data=df,
    groups=df["Market"],
    re_formula="~Year"
)
result = model.fit(method="lbfgs", reml=True)


In [None]:
result.summary()

**Observation:**  
Mixed-effects model results show clear, consistent price differences between markets, even after controlling for product type and arrival date.  
This suggests that market-level factors (e.g., supply chain, demand patterns, VFPCK vs. non-VFPCK operations) play a significant role in price variation.  
These coefficients provide a baseline-adjusted measure of each market’s typical pricing, which we will use to rank markets by relative price levels.

In [None]:
# Get market ranking
fe_params = result.fe_params
market_ranking = fe_params.filter(like='C(Market)')
market_ranking.index = market_ranking.index.str.replace('C(Market)[T.', '').str.replace(']', '')
market_ranking['Aluva'] = 0  # Add Aluva as baseline
market_ranking.sort_values(ascending=True, inplace=True)
print(market_ranking)

We observe that most VFPCK markets tend to offer lower prices compared to non-VFPCK markets.  
Earlier seasonal analysis showed that VFPCK prices also fluctuate with seasonality.  
From a consumer’s perspective, these lower prices are beneficial, but for farmers, they may reduce profit margins.  

Higher prices in urban or semi-urban markets like Angamaly and Ernakulam could be driven by stronger demand and higher willingness to pay, possibly amplified by transportation and logistics costs.  
However, to draw a stronger conclusion, we would need to map each market’s location, classify them as urban/rural, and examine their surrounding population and demand patterns.