In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
# https://www.statsmodels.org/stable/index.html
import statsmodels.api as sm

In [4]:
import warnings
warnings.filterwarnings("ignore")

In [5]:
from functools import partial

In [6]:
from dotenv import load_dotenv

from pathlib import Path

env_path = Path("../../.env-live")

if env_path.exists():
    print('envs Loaded')
    load_dotenv(dotenv_path=env_path)
from jrjModelRegistry.jrjModelRegistry import registerAJrjModel

ModuleNotFoundError: No module named 'jrjModelRegistry'

In [None]:
def generalRegressionPredictor(self, transformedData):
    return self.predict(transformedData)

In [None]:
wagesDf = pd.read_excel("./Wages.xlsx")
# wagesDf = pd.read_excel("https://www.dropbox.com/scl/fi/v7c1c8a3cnncuv1fo28es/Wages.xlsx?rlkey=vli12nwph687hvn9jskgf73a1&st=s862pfm6&dl=1")
wagesDf

In [None]:
wagesDf.size

In [None]:
wagesDf.describe()

In [None]:
wagesDf.shape

In [None]:
# Plotting
fig1 = plt.figure(
  figsize=(8, 8)
)

In [None]:
plt.scatter(
  wagesDf["Educ"],
  wagesDf["Wage"],
  color='blue',
  alpha=0.9,
  label='Data Points - scatter',
)


plt.title('Education Level vs. Wage with OLS Regression')
plt.xlabel('Education Level(yr)')
plt.ylabel('Wage K')
plt.legend()
plt.grid(True)



plt.show()

In [None]:
def wageModel1Transformer(dataForTransfer = None):
    import pandas as pd
    import statsmodels.api as sm
    if isinstance(dataForTransfer, pd.DataFrame):
        df = dataForTransfer.copy()
    else:
        df = pd.DataFrame(dataForTransfer)
    dfTransformer = sm.add_constant(df[['Educ']],has_constant='add')
    return dfTransformer

In [None]:
wageModel1 = sm.OLS(
  wagesDf["Wage"],
  wageModel1Transformer(wagesDf)
)
wageModel1Fit = wageModel1.fit()
print(wageModel1Fit.summary())

In [None]:
wage1SampleData = {
    "Educ": [12],
    "Age": [76]
}

In [None]:

wageModel1Fit.transformer = wageModel1Transformer
wageModel1Fit.mainPredictor = partial(generalRegressionPredictor, wageModel1Fit)
registerAJrjModel(
    wageModel1Fit,
    {
        "modelName":f"saadet_doga_hascelik__wageModel1Fit",
        "version":"1.0.1",
        "params": wageModel1Fit.params.to_dict(),
        "score": float(wageModel1Fit.rsquared),
        "modelLibraray": 'sm.OLS',
        "libraryMetadata": {
            "pvalues": wageModel1Fit.pvalues.to_dict(),
            "r_squared": float(wageModel1Fit.rsquared),
            "adj_r_squared": float(wageModel1Fit.rsquared_adj)
        },
    
        "sampleData": {
            "dataForTransfer": wage1SampleData
        }
    }
)

In [None]:
predictedWage1 = wageModel1Fit.predict(wageModel1Transformer(wagesDf))
wagesDf['predictedWage1'] = predictedWage1
wagesDf

In [None]:
plt.scatter(
  wagesDf["Educ"],
  wagesDf["Wage"],
  color='blue',
  alpha=0.9,
  label='Data Points - scatter',
)

plt.plot(
  wagesDf["Educ"],
  wagesDf["predictedWage1"],
  color='red',
  label='OLS Regression - predictedWage1'
)
plt.title('Educ Level vs. Wage with OLS Regression')
plt.xlabel('Educ Level(yr)')
plt.ylabel('Wage K')
plt.legend()
plt.grid(True)


plt.show()

In [None]:
plt.scatter(
  wagesDf["Age"],
  wagesDf["Wage"],
  color='blue',
  alpha=0.9,
  label='Data Points - scatter',
)


plt.title('Age vs. Wage with OLS Regression')
plt.xlabel('Age')
plt.ylabel('Wage K')
plt.legend()
plt.grid(True)



plt.show()

In [None]:
def wageModel2Transformer(dataForTransfer = None):
    import pandas as pd
    import statsmodels.api as sm
    if isinstance(dataForTransfer, pd.DataFrame):
        df = dataForTransfer.copy()
    else:
        df = pd.DataFrame(dataForTransfer)
    dfTransformer = sm.add_constant(df[['Age']],has_constant='add')
    return dfTransformer

In [None]:
wageModel2 = sm.OLS(
  wagesDf["Wage"],
  wageModel2Transformer(wagesDf)
)
wageModel2Fit = wageModel2.fit()
print(wageModel2Fit.summary())

In [None]:
wageModel2Fit.transformer = wageModel2Transformer
wageModel2Fit.mainPredictor = partial(generalRegressionPredictor, wageModel2Fit)
registerAJrjModel(
    wageModel2Fit,
    {
        "modelName":f"saadet_doga_hascelik__wageModel2Fit",
        "version":"1.0.1",
        "params": wageModel2Fit.params.to_dict(),
        "score": float(wageModel2Fit.rsquared),
        "modelLibraray": 'sm.OLS',
        "libraryMetadata": {
            "pvalues": wageModel2Fit.pvalues.to_dict(),
            "r_squared": float(wageModel2Fit.rsquared),
            "adj_r_squared": float(wageModel2Fit.rsquared_adj)
        },
    
        "sampleData": {
            "dataForTransfer": wage1SampleData
        }
    }
)

In [None]:
predictedWage2 = wageModel2Fit.predict(wageModel2Transformer(wagesDf))
wagesDf['predictedWage2'] = predictedWage2
wagesDf

In [None]:
# Plotting
plt.figure(
  figsize=(8, 8)
)

plt.scatter(
  wagesDf["Age"],
  wagesDf["Wage"],
  color='blue',
  alpha=0.9,
  label='Data Points - scatter',
)

plt.plot(
  wagesDf["Age"],
  wagesDf["predictedWage2"],
  color='red',
  label='OLS Regression - predictedWage2'
)
plt.title('Age. Wage with OLS Regression')
plt.xlabel('Age')
plt.ylabel('Wage K')
plt.legend()
plt.grid(True)



plt.show()

In [None]:
def wageModel3Transformer(dataForTransfer = None):
    import pandas as pd
    import statsmodels.api as sm
    if isinstance(dataForTransfer, pd.DataFrame):
        df = dataForTransfer.copy()
    else:
        df = pd.DataFrame(dataForTransfer)
    df['agePower2'] = df.apply(lambda row: row['Age'] * row['Age'], axis=1)
    dfTransformer = sm.add_constant(df[['Age', 'agePower2']],has_constant='add')
    return dfTransformer

In [None]:
wageModel3 = sm.OLS(
  wagesDf["Wage"],
  wageModel3Transformer(wagesDf)
)
wageModel3Fit = wageModel3.fit()
print(wageModel3Fit.summary())

In [None]:
wageModel3Fit.transformer = wageModel3Transformer
wageModel3Fit.mainPredictor = partial(generalRegressionPredictor, wageModel3Fit)
registerAJrjModel(
    wageModel3Fit,
    {
        "modelName":f"saadet_doga_hascelik__wageModel3Fit",
        "version":"1.0.1",
        "params": wageModel3Fit.params.to_dict(),
        "score": float(wageModel3Fit.rsquared),
        "modelLibraray": 'sm.OLS',
        "libraryMetadata": {
            "pvalues": wageModel3Fit.pvalues.to_dict(),
            "r_squared": float(wageModel3Fit.rsquared),
            "adj_r_squared": float(wageModel3Fit.rsquared_adj)
        },
    
        "sampleData": {
            "dataForTransfer": wage1SampleData
        }
    }
)

In [None]:
predictedWage3 = wageModel3Fit.predict(wageModel3Transformer(wagesDf))
wagesDf['predictedWage3'] = predictedWage3
wagesDf

In [None]:
wagesDf = wagesDf.sort_values(by="Age")

In [None]:
# Plotting
plt.figure(
  figsize=(8, 8)
)

plt.scatter(
  wagesDf["Age"],
  wagesDf["Wage"],
  color='blue',
  alpha=0.9,
  label='Data Points - scatter',
)

plt.plot(
  wagesDf["Age"],
  wagesDf["predictedWage2"],
  color='red',
  label='OLS Regression - predictedWage2'
)

plt.plot(
  wagesDf["Age"],
  wagesDf["predictedWage3"],
  color='green',
  label='OLS Regression - predictedWage3'
)
plt.title('Age. Wage with OLS Regression')
plt.xlabel('Age')
plt.ylabel('Wage K')
plt.legend()
plt.grid(True)



plt.show()

In [None]:
wagesDf

In [None]:
fig = plt.figure()
ax = plt.axes(projection ="3d")

# Creating plot
ax.scatter3D(
  wagesDf["Age"],
  wagesDf["Educ"],
  wagesDf["Wage"],
  color = "green"
)
plt.title("Cost,Grad -> Wage")
ax.set_xlabel('Age')
ax.set_ylabel('Educ')
ax.set_zlabel('Wage')

# show plot
plt.show()

In [None]:
def wageModel4Transformer(dataForTransfer = None):
    import pandas as pd
    import statsmodels.api as sm
    if isinstance(dataForTransfer, pd.DataFrame):
        df = dataForTransfer.copy()
    else:
        df = pd.DataFrame(dataForTransfer)
    dfTransformer = sm.add_constant(df[['Educ','Age']],has_constant='add')
    return dfTransformer

In [None]:
wageModel4 = sm.OLS(
  wagesDf["Wage"],
  wageModel4Transformer(wagesDf)
)
wageModel4Fit = wageModel4.fit()
print(wageModel4Fit.summary())

In [None]:
wageModel4Fit.transformer = wageModel4Transformer
wageModel4Fit.mainPredictor = partial(generalRegressionPredictor, wageModel4Fit)
registerAJrjModel(
    wageModel4Fit,
    {
        "modelName":f"saadet_doga_hascelik__wageModel4Fit",
        "version":"1.0.1",
        "params": wageModel4Fit.params.to_dict(),
        "score": float(wageModel4Fit.rsquared),
        "modelLibraray": 'sm.OLS',
        "libraryMetadata": {
            "pvalues": wageModel4Fit.pvalues.to_dict(),
            "r_squared": float(wageModel4Fit.rsquared),
            "adj_r_squared": float(wageModel4Fit.rsquared_adj)
        },
    
        "sampleData": {
            "dataForTransfer": wage1SampleData
        }
    }
)

In [None]:
predictedWage4 = wageModel4Fit.predict(
  wageModel4Transformer(wagesDf)
)
wagesDf['predictedWage4'] = predictedWage4
wagesDf

In [None]:
# Extracting coefficients
intercept = wageModel4Fit.params['const']
coefAge = wageModel4Fit.params['Age']
coefEduc = wageModel4Fit.params['Educ']

# Create 3D grid for plotting
ageRange = np.linspace(wagesDf['Age'].min(), wagesDf['Age'].max(), 100)
educRange = np.linspace(wagesDf['Educ'].min(), wagesDf['Educ'].max(), 100)
ageGrid, educGrid = np.meshgrid(ageRange, educRange)

# Calculate predicted Wage for each combination of Age and Educ
wagePredictEq = intercept + coefAge * ageGrid + coefEduc * educGrid




fig = plt.figure()
ax = plt.axes(projection ="3d")

# Scatter plot of the actual data points
ax.scatter(wagesDf['Age'], wagesDf['Educ'], wagesDf['Wage'], color='blue', label='Actual Wage')

# Plotting the fitted plane
ax.plot_surface(ageGrid, educGrid, wagePredictEq, color='red', alpha=0.5, label='Fitted Plane')

# Labeling axes
ax.set_xlabel('Age')
ax.set_ylabel('Educ')
ax.set_zlabel('Wage')



plt.title('Age and Educ vs. Wage with Fitted Plane')

# Rotating the plot
# ax.view_init(elev=45, azim=45)  # Set the elevation and azimuth angles
plt.show()


In [None]:
def wageModel5Transformer(dataForTransfer = None):
    import pandas as pd
    import statsmodels.api as sm
    if isinstance(dataForTransfer, pd.DataFrame):
        df = dataForTransfer.copy()
    else:
        df = pd.DataFrame(dataForTransfer)
    df['agePower2'] = df.apply(lambda row: row['Age'] * row['Age'], axis=1)
    dfTransformer = sm.add_constant(df[['Educ','Age', 'agePower2']],has_constant='add')
    return dfTransformer
wageModel5 = sm.OLS(
  wagesDf["Wage"],
  wageModel5Transformer(wagesDf)
)
wageModel5Fit = wageModel5.fit()
print(wageModel5Fit.summary())

In [None]:
wageModel5Fit.transformer = wageModel5Transformer
wageModel5Fit.mainPredictor = partial(generalRegressionPredictor, wageModel5Fit)
registerAJrjModel(
    wageModel5Fit,
    {
        "modelName":f"saadet_doga_hascelik__wageModel5Fit",
        "version":"1.0.1",
        "params": wageModel5Fit.params.to_dict(),
        "score": float(wageModel5Fit.rsquared),
        "modelLibraray": 'sm.OLS',
        "libraryMetadata": {
            "pvalues": wageModel5Fit.pvalues.to_dict(),
            "r_squared": float(wageModel5Fit.rsquared),
            "adj_r_squared": float(wageModel5Fit.rsquared_adj)
        },
    
        "sampleData": {
            "dataForTransfer": wage1SampleData
        }
    }
)

In [None]:
predictedWage5 = wageModel5Fit.predict(
  wageModel5Transformer(wagesDf)
)
wagesDf['predictedWage5'] = predictedWage5
wagesDf

In [None]:
# Extracting coefficients
intercept2 = wageModel5Fit.params['const']
coefAge2 = wageModel5Fit.params['Age']
coefEduc2 = wageModel5Fit.params['Educ']
coefAgePower22 = wageModel5Fit.params['agePower2']

# Create 3D grid for plotting
ageRange = np.linspace(wagesDf['Age'].min(), wagesDf['Age'].max(), 100)
educRange = np.linspace(wagesDf['Educ'].min(), wagesDf['Educ'].max(), 100)
ageGrid, educGrid = np.meshgrid(ageRange, educRange)

# Calculate predicted Wage for each combination of Age and Educ
wagePredictEq2 = intercept2 + coefAge2 * ageGrid + coefEduc2 * educGrid + coefAgePower22 * ageGrid * ageGrid




fig = plt.figure()
ax = plt.axes(projection ="3d")

# Scatter plot of the actual data points
ax.scatter(wagesDf['Age'], wagesDf['Educ'], wagesDf['Wage'], color='blue', label='Actual Wage')



# Plotting the fitted plane
ax.plot_surface(ageGrid, educGrid, wagePredictEq2, color='green', alpha=0.5, label='Fitted Plane')

# Labeling axes
ax.set_xlabel('Age')
ax.set_ylabel('Educ')
ax.set_zlabel('Wage')



plt.title('Age and Educ vs. Wage with Fitted Plane')

# Rotating the plot
# ax.view_init(elev=45, azim=45)  # Set the elevation and azimuth angles
plt.show()

In [None]:


fig = plt.figure()
ax = plt.axes(projection ="3d")

# Scatter plot of the actual data points
ax.scatter(wagesDf['Age'], wagesDf['Educ'], wagesDf['Wage'], color='blue', label='Actual Wage')

# Plotting the fitted plane
ax.plot_surface(ageGrid, educGrid, wagePredictEq, color='red', alpha=0.5, label='Fitted Plane')


# Plotting the fitted plane
ax.plot_surface(ageGrid, educGrid, wagePredictEq2, color='green', alpha=0.5, label='Fitted Plane')

# Labeling axes
ax.set_xlabel('Age')
ax.set_ylabel('Educ')
ax.set_zlabel('Wage')



plt.title('Age and Educ vs. Wage with Fitted Plane')

# Rotating the plot
# ax.view_init(elev=45, azim=45)  # Set the elevation and azimuth angles
plt.show()

In [None]:
## AFTER AREFULLY LOOKUP WE RELIZED  wageModel5Fit SO LETS KEEP IT AS THE BEST

In [None]:
compareDf = pd.read_csv('./wageModelsComparison.csv')
compareDf

In [None]:
registerAJrjModel(
    wageModel5Fit,
    {
        "modelName":f"saadet_doga_hascelik__wageModelBEST",
        "version":"1.0.1",
        "params": wageModel5Fit.params.to_dict(),
        "score": float(wageModel5Fit.rsquared),
        "modelLibraray": 'sm.OLS',
        "libraryMetadata": {
            "pvalues": wageModel5Fit.pvalues.to_dict(),
            "r_squared": float(wageModel5Fit.rsquared),
            "adj_r_squared": float(wageModel5Fit.rsquared_adj)
        },
    
        "sampleData": {
            "dataForTransfer": wage1SampleData
        }
    }
)