In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from xgboost import XGBRegressor

from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd

In [2]:
# Load the dataframe.
df_train = pd.read_csv('../data/train.csv')

In [3]:
# Group by 'user_session' and create new features.
agg_df = df_train.groupby('user_session').agg(
    unique_event_types=('event_type', 'nunique'),
    unique_products=('product_id', 'nunique'),
    unique_categories=('category_id', 'nunique'),
    event_count=('event_time', 'count'),
    user_id=('user_id', 'first'),
    session_value=('session_value', 'first')
).reset_index()

In [4]:
# Prepare features (X) and target (y)
features = ['unique_event_types', 'unique_products', 'unique_categories', 'event_count']
X = agg_df[features]
y = agg_df['session_value']

In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Define a dictionary of models to train and evaluate
models = {
    'Linear Regression': LinearRegression(),
    'Random Forest Regressor': RandomForestRegressor(random_state=42),
    'Gradient Boosting Regressor': GradientBoostingRegressor(random_state=42),
    'XGBoost Regressor': XGBRegressor(random_state=42),
    'AdaBoost Regressor': AdaBoostRegressor(random_state=42)
}

# Create a list to store the results
results = []

# Loop through the models, train, and evaluate them
for name, model in models.items():
    print(f"Eğitiliyor: {name}")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    results.append({'Model': name, 'MSE': mse, 'RMSE': rmse})

# Convert results to a DataFrame for easy viewing
results_df = pd.DataFrame(results)

# Print the comparison table
print("\nModel Karşılaştırma Sonuçları:")
print(results_df.to_string(index=False))

Eğitiliyor: Linear Regression
Eğitiliyor: Random Forest Regressor
Eğitiliyor: Gradient Boosting Regressor


In [7]:
print(results)

[{'Model': 'Linear Regression', 'MSE': 1636.427543066372, 'RMSE': 40.4527816480693}, {'Model': 'Random Forest Regressor', 'MSE': 1698.7222183652375, 'RMSE': 41.21555796498741}, {'Model': 'Gradient Boosting Regressor', 'MSE': 1647.8624885989354, 'RMSE': 40.59387254991738}]
