In [1]:
import numpy as np
import pandas as pd

In [2]:
# https://www.statsmodels.org/stable/index.html
import statsmodels.api as sm

In [3]:
# Download Dataset from https://www.dropbox.com/scl/fi/zt2vtwhpz8ndblsxqdqx1/Salary_MIS.xlsx?rlkey=2uk6m7m9w90isv6zsynhhhpyv&st=gxumjns5&dl=1
# and add it to colab

In [4]:
sallaryMisDf = pd.read_excel("https://www.dropbox.com/scl/fi/zt2vtwhpz8ndblsxqdqx1/Salary_MIS.xlsx?rlkey=2uk6m7m9w90isv6zsynhhhpyv&st=gxumjns5&dl=1")

In [5]:
# sallaryMisDf = pd.read_excel("./Salary_MIS.xlsx")

In [6]:
sallaryMisDf

Unnamed: 0,Salary,GPA,MIS,Statistics
0,72,3.53,1,0
1,66,2.86,1,0
2,72,3.69,0,0
3,63,3.24,0,0
4,65,3.21,0,0
...,...,...,...,...
115,66,3.27,0,0
116,63,2.86,1,0
117,78,3.04,1,1
118,64,2.99,0,0


In [7]:
sallaryMisDf.shape

(120, 4)

In [8]:
sallaryMisDf.describe()

Unnamed: 0,Salary,GPA,MIS,Statistics
count,120.0,120.0,120.0,120.0
mean,69.875,3.24275,0.316667,0.341667
std,6.594577,0.493834,0.467127,0.476257
min,53.0,2.41,0.0,0.0
25%,65.75,2.805,0.0,0.0
50%,70.0,3.28,0.0,0.0
75%,73.25,3.6925,1.0,1.0
max,88.0,3.98,1.0,1.0


In [27]:
def transformAllInteractions(df):
    df = df.copy()
    df['misStatistics'] = df.apply(lambda row: row['MIS'] * row["Statistics"], axis=1)
    df['gpaMis'] = df.apply(lambda row: row['GPA'] * row["MIS"], axis=1)
    df['gpaStatistics'] = df.apply(lambda row: row['GPA'] * row["Statistics"], axis=1)
    df.insert(0, 'const', 1)
    
    return df

In [28]:
newDfs = transformAllInteractions(sallaryMisDf[["GPA", "MIS", "Statistics"]])
newDfs.head()

Unnamed: 0,const,GPA,MIS,Statistics,misStatistics,gpaMis,gpaStatistics
0,1,3.53,1,0,0.0,3.53,0.0
1,1,2.86,1,0,0.0,2.86,0.0
2,1,3.69,0,0,0.0,0.0,0.0
3,1,3.24,0,0,0.0,0.0,0.0
4,1,3.21,0,0,0.0,0.0,0.0


In [29]:
salaryBasedOnGpaMisStatistics_Transfoms_misXStatistics = sm.OLS(
  sallaryMisDf["Salary"],
  transformAllInteractions(sallaryMisDf[["GPA", "MIS", "Statistics"]])
)

In [35]:
salaryBasedOnGpaMisStatistics_Transfoms_misXStatisticsFit = salaryBasedOnGpaMisStatistics_Transfoms_misXStatistics.fit()
# salaryBasedOnGpaMisStatistics_Transfoms_misXStatisticsFit.summary()

In [36]:
from mlModelSaver import MlModelSaver
mlModelSaverInstance = MlModelSaver({
    "baseRelativePath": "..",
    "modelsFolder": "models"
})

loadedModel = mlModelSaverInstance.exportModel(
    salaryBasedOnGpaMisStatistics_Transfoms_misXStatisticsFit,
    {
        "modelName": "salaryBasedOnGpaMisStatistics_Transfoms_misXStatisticsFit",
        "description": "salaryBasedOnGpaMisStatistics_Transfoms_misXStatisticsFit",
        "modelType": "sm.OLS",
        "inputs": [
            {
                "name": "GPA",
                "type": "float",
            },
            {
                "name": "MIS",
                "type": "binary"
            },
            {
                "name": "Statistics",
                "type": "binary"
            }
        ],
        "transformer": transformAllInteractions,
        "outputs": [
            {
                "name": "Salary",
                "type": "float"
            }
        ]
    }
)
loadedModel

<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x3689869c0>

In [34]:
# print(loadedModel.summary())

In [37]:

testData = [{
    'GPA': 3.53,
    'MIS': 1,
    'Statistics': 0
}]

# Create a DataFrame from the dictionary
testDf = pd.DataFrame(testData)


In [38]:
salaryBasedOnGpaMisStatistics_Transfoms_misXStatisticsFit.predict( transformAllInteractions(testDf))

0    73.392068
dtype: float64

In [39]:
loadedModel.mlModelSavePredict(testDf)

[{'Salary': 73.39206844154909}]