In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso, Ridge
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.pipeline import make_pipeline

# Load data
df = pd.read_csv("https://raw.githubusercontent.com/erkansirin78/datasets/master/Advertising.csv")
X = df.iloc[:, 1:-1].values
y = df.iloc[:, -1].values


In [9]:
# Define models
models = {
    "RandomForest": RandomForestRegressor(n_estimators=200),
    "XGBoost": XGBRegressor(n_estimators=200, learning_rate=0.1),
    "Lasso": make_pipeline(StandardScaler(), Lasso(alpha=0.1)),
    "Ridge": make_pipeline(StandardScaler(), Ridge(alpha=1.0))
}


In [15]:
cv_scores = {}

for name, model in models.items():
    scores = cross_val_score(model, X, y, cv=5, scoring='r2')
    cv_scores[name] = scores
    print(f"{name}: Average R2 = {scores.mean():.4f}, Std = {scores.std():.4f}")

RandomForest: Average R2 = 0.9754, Std = 0.0087
XGBoost: Average R2 = 0.9719, Std = 0.0103
Lasso: Average R2 = 0.8886, Std = 0.0396
Ridge: Average R2 = 0.8872, Std = 0.0407


In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import joblib
from sklearn.metrics import r2_score
import os 



# Create the directory if it does not exist
model_directory = "../save-model"
if not os.path.exists(model_directory):
    os.makedirs(model_directory)

# read data
df = pd.read_csv("https://raw.githubusercontent.com/erkansirin78/datasets/master/Advertising.csv")
print(df.head())

# Feature matrix
X = df.iloc[:, 1:-1].values
print(X.shape)
print(X[:3])

# Output variable
y = df.iloc[:, -1]
print(y.shape)
print(y[:6])

# split test train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# train model
forest = RandomForestRegressor(n_estimators=200)
forest.fit(X_train, y_train)

# Test model
y_pred = forest.predict(X_test)
r2 = r2_score(y_true=y_test, y_pred=y_pred)
print(f"R2: {r2}")

# Save Model
joblib.dump(forest, f"{model_directory}/random-forest-ads.pkl")

# make predictions
# Read models
estimator_loaded = joblib.load("../save-model/random-forest-ads.pkl")
# Prediction set
X_manual_test = [[222.3,33.4,57.2]]
print("X_manual_test", X_manual_test)
prediction = forest.predict(X_manual_test)
print(f"prediction: {prediction}")

   ID     TV  Radio  Newspaper  Sales
0   1  230.1   37.8       69.2   22.1
1   2   44.5   39.3       45.1   10.4
2   3   17.2   45.9       69.3    9.3
3   4  151.5   41.3       58.5   18.5
4   5  180.8   10.8       58.4   12.9
(200, 3)
[[230.1  37.8  69.2]
 [ 44.5  39.3  45.1]
 [ 17.2  45.9  69.3]]
(200,)
0    22.1
1    10.4
2     9.3
3    18.5
4    12.9
5     7.2
Name: Sales, dtype: float64
R2: 0.9807482084762262
X_manual_test [[222.3, 33.4, 57.2]]
prediction: [19.665]


In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import r2_score
import os
import joblib


# Create the directory if it does not exist
model_directory = "../save-model"
if not os.path.exists(model_directory):
    os.makedirs(model_directory)

# read data
df = pd.read_csv("https://raw.githubusercontent.com/erkansirin78/datasets/master/Advertising.csv")
print(df.head())

# Feature matrix and output variable
X = df.iloc[:, 1:-1].values
y = df.iloc[:, -1]

# split test train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# train model with XGBoost
xgb_model = XGBRegressor(n_estimators=200, learning_rate=0.1)
xgb_model.fit(X_train, y_train)

# Test model
y_pred = xgb_model.predict(X_test)
r2 = r2_score(y_test, y_pred)
print(f"R2: {r2}")

# Save Model
joblib.dump(xgb_model, f"{model_directory}/xgboost-ads.pkl")
# make predictions
# Read models
estimator_loaded = joblib.load("../save-model/xgboost-ads.pkl")

# Prediction set
X_manual_test = [[222.3,33.4,57.2]]
prediction = estimator_loaded.predict(X_manual_test)
print(f"prediction: {prediction[0]}")

   ID     TV  Radio  Newspaper  Sales
0   1  230.1   37.8       69.2   22.1
1   2   44.5   39.3       45.1   10.4
2   3   17.2   45.9       69.3    9.3
3   4  151.5   41.3       58.5   18.5
4   5  180.8   10.8       58.4   12.9
R2: 0.9650897027405347
prediction: 20.399276733398438
