In [1]:
def add_constant_column(df):
    """
    Adds a constant column 'const' with value 1 as the first column to the DataFrame.
    
    Parameters:
    df (pd.DataFrame): Input DataFrame.
    
    Returns:
    pd.DataFrame: DataFrame with the added constant column as the first column.
    """
    # Create a new DataFrame to avoid modifying the original DataFrame
    df_with_const = df.copy()
    
    # Add a constant column with value 1
    df_with_const.insert(0, 'const', 1)
    
    return df_with_const

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from mlModelSaver import MlModelSaver

mowersDf = pd.read_excel('https://www.dropbox.com/scl/fi/y2rktyoqb8rrshrnlpvw1/Mowers.xlsx?rlkey=e5bi1d8sx5hml4ylfkjv7cryh&dl=1')
mowersDf.head()

Unnamed: 0,Sales,Temperature,Advertising,Discount
0,17235,33,15,5.0
1,19854,42,25,5.0
2,45786,58,40,10.0
3,49745,67,70,20.0
4,65894,73,75,20.0


In [3]:
# https://www.statsmodels.org/stable/index.html
import statsmodels.api as sm
# Your answer

In [4]:
modelPredictSaleByTemperatureAdvertisingDiscount = sm.OLS(
  mowersDf["Sales"],
  add_constant_column(mowersDf[["Temperature", "Advertising", "Discount"]])
)
modelPredictSaleByTemperatureAdvertisingDiscountFit = modelPredictSaleByTemperatureAdvertisingDiscount.fit()
print(modelPredictSaleByTemperatureAdvertisingDiscountFit.summary())

                            OLS Regression Results                            
Dep. Variable:                  Sales   R-squared:                       0.943
Model:                            OLS   Adj. R-squared:                  0.935
Method:                 Least Squares   F-statistic:                     110.5
Date:                Sun, 16 Jun 2024   Prob (F-statistic):           1.29e-12
Time:                        15:57:16   Log-Likelihood:                -233.93
No. Observations:                  24   AIC:                             475.9
Df Residuals:                      20   BIC:                             480.6
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const       -1730.0915   5791.339     -0.299      

In [5]:
# add_constant_column(mowersDf[["Temperature", "Advertising", "Discount"]])

In [6]:
from mlModelSaver import MlModelSaver
mlModelSaverInstance = MlModelSaver({
    "baseRelativePath": "..",
    "modelsFolder": "models"
})

loadedModel = mlModelSaverInstance.exportModel(
    modelPredictSaleByTemperatureAdvertisingDiscountFit,
    {
        "modelName": "modelPredictSaleByTemperatureAdvertisingDiscountFit",
        "description": "",
        "modelType": "sm.OLS",
        "inputs": [
            {
                "name": "Temperature",
                "type": "float",
            },
            {
                "name": "Advertising",
                "type": "float"
            },
            {
                "name": "Discount",
                "type": "float"
            }
        ],
        "transformer": add_constant_column,
        "outputs": [
            {
                "name": "Sales",
                "type": "float"
            }
        ]
    }
)
loadedModel

<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x3268af380>

In [7]:

testData = [
    {
        "Temperature": 42,
        "Advertising": 15,
        "Discount": 5
    }
]

# Create a DataFrame from the dictionary
testDf = pd.DataFrame(testData)


In [8]:
modelPredictSaleByTemperatureAdvertisingDiscountFit.predict( add_constant_column(testDf))


0    19590.46727
dtype: float64

In [9]:
loadedModel.mlModelSavePredict(testDf)

[{'Sales': 19590.467270313893}]