In [48]:
import numpy as np
import polars as pl
import polars.selectors as cs
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import great_tables as tg
import altair as alt

In [49]:
dataset_path = r'../DATASETS/Position_Salaries.csv'

In [50]:
dataset = pl.read_csv(dataset_path)

In [51]:
dataset

Position,Level,Salary
str,i64,i64
"""Business Analyst""",1,45000
"""Junior Consultant""",2,50000
"""Senior Consultant""",3,60000
"""Manager""",4,80000
"""Country Manager""",5,110000
"""Region Manager""",6,150000
"""Partner""",7,200000
"""Senior Partner""",8,300000
"""C-level""",9,500000
"""CEO""",10,1000000


In [52]:
X = dataset.select(cs.exclude('Salary', 'Position'))

In [53]:
y = dataset.select('Salary').to_series()

In [54]:
from sklearn.model_selection import train_test_split

In [55]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [56]:
from sklearn.linear_model import LinearRegression

In [57]:
linear_model = LinearRegression()

In [58]:
linear_model.fit(X, y)

In [59]:
from sklearn.preprocessing import PolynomialFeatures

In [60]:
poly_features = PolynomialFeatures(degree=2)

In [61]:
X_poly = poly_features.fit_transform(X)

In [62]:
linear_model_2 = LinearRegression()

In [63]:
linear_model_2.fit(X_poly, y)

In [69]:
fig = px.scatter(
    data_frame=dataset,
    x='Level',
    y='Salary',
    color='Position',
    title='Salary vs Position'
).update_layout(
    width=1000,
    height=600,
)

predicted_salary = linear_model.predict(X)
predicted_dataset = dataset.with_columns(pl.Series(name="Predicted Salary", values=predicted_salary))

fig.add_scatter(
    x=predicted_dataset["Level"],
    y=predicted_dataset["Predicted Salary"],
    mode='lines',
    name='Linear Regression'
)

fig.show()

In [65]:
fig = px.scatter(
    data_frame=dataset,
    x='Level',
    y='Salary',
    color='Position',
    title='Salary vs Position'
).update_layout(
    width=1000,
    height=600
)

predicted_salaries = linear_model_2.predict(X_poly)
predicted_dataset = dataset.with_columns(pl.Series(name="Predicted Salary", values=predicted_salaries))

fig.add_scatter(
    x=predicted_dataset["Level"],
    y=predicted_dataset["Predicted Salary"],
    mode='lines',
    name='Polynomial Regression'
)

fig.show()

In [75]:
linear_model_2.predict([[1, 2, 3, ]])

array([24719.6969697])

In [82]:
linear_model_2.predict(poly_features.fit_transform([[6.5]]))

array([189498.10606061])