In [None]:
import pandas as pd
import numpy as np

In [None]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [None]:
for i in train.columns:
    print(i)

In [None]:
X = train.drop([f'BlendProperty{i}' for i in range(1,11)], axis=1)

In [None]:
y = train[[f'BlendProperty{i}' for i in range(1,11)]]

In [None]:
# Create new features by multiplying component fractions by their properties
num_components = 5
num_properties = 10

X_engineered = X.copy()

for i in range(1, num_components + 1):
    for j in range(1, num_properties + 1):
        X_engineered[f'Component{i}_fraction_x_Component{i}_Property{j}'] = X_engineered[f'Component{i}_fraction'] * X_engineered[f'Component{i}_Property{j}']

display(X_engineered.head())

In [None]:
X_engineered_summary = X_engineered.copy()

for i in range(1, num_components + 1):
    properties = [f'Component{i}_Property{j}' for j in range(1, num_properties + 1)]
    X_engineered_summary[f'Component{i}_Property_mean'] = X_engineered_summary[properties].mean(axis=1)
    X_engineered_summary[f'Component{i}_Property_std'] = X_engineered_summary[properties].std(axis=1)
    X_engineered_summary[f'Component{i}_Property_min'] = X_engineered_summary[properties].min(axis=1)
    X_engineered_summary[f'Component{i}_Property_max'] = X_engineered_summary[properties].max(axis=1)
    X_engineered_summary[f'Component{i}_Property_range'] = X_engineered_summary[properties].max(axis=1) - X_engineered_summary[properties].min(axis=1)

display(X_engineered_summary.head())

In [None]:
X_engineered_dominant = X_engineered.copy()

X_engineered_dominant['Dominant_comp'] = X_engineered_dominant[[f'Component{i}_fraction' for i in range(1, num_components + 1)]].idxmax(axis=1).str.replace('Component', '').str.replace('_fraction', '').astype(int)

display(X_engineered_dominant.head())

In [None]:
import lightgbm as lgb
from sklearn.multioutput import MultiOutputRegressor

# Assuming X_engineered_summary is the final engineered feature set and y is the target DataFrame

# Include dominant component feature engineering in the training data as well
X_train_engineered_summary = X_engineered_summary.copy()
X_train_engineered_summary['Dominant_comp'] = X_train_engineered_summary[[f'Component{i}_fraction' for i in range(1, num_components + 1)]].idxmax(axis=1).str.replace('Component', '').str.replace('_fraction', '').astype(int)


# Create and train a LightGBM model for each target variable
lgbm_models = {}
for col in y.columns:
    print(f"Training LightGBM model for {col}...")
    model = lgb.LGBMRegressor(random_state=42)
    model.fit(X_train_engineered_summary, y[col])
    lgbm_models[col] = model

print("LightGBM model training complete.")

In [None]:
# Apply the same feature engineering to the test data
num_components = 5
num_properties = 10

X_test_engineered_summary = test.copy()

# Drop the 'ID' column from the test data before feature engineering
if 'ID' in X_test_engineered_summary.columns:
    X_test_engineered_summary = X_test_engineered_summary.drop('ID', axis=1)


for i in range(1, num_components + 1):
    for j in range(1, num_properties + 1):
        X_test_engineered_summary[f'Component{i}_fraction_x_Component{i}_Property{j}'] = X_test_engineered_summary[f'Component{i}_fraction'] * X_test_engineered_summary[f'Component{i}_Property{j}']

for i in range(1, num_components + 1):
    properties = [f'Component{i}_Property{j}' for j in range(1, num_properties + 1)]
    X_test_engineered_summary[f'Component{i}_Property_mean'] = X_test_engineered_summary[properties].mean(axis=1)
    X_test_engineered_summary[f'Component{i}_Property_std'] = X_test_engineered_summary[properties].std(axis=1)
    X_test_engineered_summary[f'Component{i}_Property_min'] = X_test_engineered_summary[properties].min(axis=1)
    X_test_engineered_summary[f'Component{i}_Property_max'] = X_test_engineered_summary[properties].max(axis=1)
    X_test_engineered_summary[f'Component{i}_Property_range'] = X_test_engineered_summary[properties].max(axis=1) - X_test_engineered_summary[properties].min(axis=1)

X_test_engineered_summary['Dominant_comp'] = X_test_engineered_summary[[f'Component{i}_fraction' for i in range(1, num_components + 1)]].idxmax(axis=1).str.replace('Component', '').str.replace('_fraction', '').astype(int)

# Make predictions on the test data for each blend property
test_predictions = {}
for col in y.columns:
    test_predictions[col] = lgbm_models[col].predict(X_test_engineered_summary)

print("Predictions on test data complete.")

In [None]:
# Create a submission DataFrame
submission_df = pd.DataFrame(test['ID'])

for col in y.columns:
    submission_df[col] = test_predictions[col]

# Save the submission file
submission_df.to_csv('submission_lgbm.csv', index=False)

print("Submission file 'submission.csv' created successfully!")
print("The accuracy was 71.42")

In [None]:
!pip install catboost

In [None]:
from catboost import CatBoostRegressor

In [None]:
# Initialize a dictionary to store trained CatBoost models
catboost_models = {}

# Iterate through each target column in the y DataFrame
for col in y.columns:
    print(f"Training CatBoost model for {col}...")
    # Instantiate a CatBoostRegressor model
    model = CatBoostRegressor(random_state=42, verbose=0)  # verbose=0 to suppress output
    # Fit the model to the engineered training data and the current target
    model.fit(X_train_engineered_summary, y[col])
    # Store the trained model in the dictionary
    catboost_models[col] = model

print("CatBoost model training complete.")

In [None]:
test_predictions_catboost = {}
for col in catboost_models:
    test_predictions_catboost[col] = catboost_models[col].predict(X_test_engineered_summary)

print("Predictions on test data complete.")

In [None]:
submission_df = pd.DataFrame(test['ID'])

for col in test_predictions_catboost:
    submission_df[col] = test_predictions_catboost[col]

submission_df.to_csv('submission_cat.csv', index=False)

print("Submission file 'submission.csv' created successfully!")
print("the accuracy is 79%")

In [None]:
import xgboost as xgb

In [None]:
# Initialize a dictionary to store trained XGBoost models
xgboost_models = {}

# Iterate through each target column in the y DataFrame
for col in y.columns:
    print(f"Training XGBoost model for {col}...")
    # Instantiate an XGBoostRegressor model
    model = xgb.XGBRegressor(random_state=42)
    # Fit the model to the engineered training data and the current target
    model.fit(X_train_engineered_summary, y[col])
    # Store the trained model in the dictionary
    xgboost_models[col] = model

print("XGBoost model training complete.")

In [None]:
test_predictions_xgboost = {}
for col in xgboost_models:
    test_predictions_xgboost[col] = xgboost_models[col].predict(X_test_engineered_summary)

print("Predictions on test data complete.")

In [None]:
submission_df = pd.DataFrame(test['ID'])

for col in test_predictions_xgboost:
    submission_df[col] = test_predictions_xgboost[col]

submission_df.to_csv('submission_xgb.csv', index=False)

print("Submission file 'submission.csv' created successfully!")

In [None]:
from sklearn.ensemble import HistGradientBoostingRegressor

In [None]:
# Initialize a dictionary to store trained HistGradientBoostingRegressor models
hgb_models = {}

# Iterate through each target column in the y DataFrame
for col in y.columns:
    print(f"Training HistGradientBoostingRegressor model for {col}...")
    # Instantiate a HistGradientBoostingRegressor model
    model = HistGradientBoostingRegressor(random_state=42)
    # Fit the model to the engineered training data and the current target
    model.fit(X_train_engineered_summary, y[col])
    # Store the trained model in the dictionary
    hgb_models[col] = model

print("HistGradientBoostingRegressor model training complete.")

In [None]:
test_predictions_hgb = {}
for col in hgb_models:
    test_predictions_hgb[col] = hgb_models[col].predict(X_test_engineered_summary)

print("Predictions on test data complete.")

In [None]:
submission_df = pd.DataFrame(test['ID'])

for col in test_predictions_hgb:
    submission_df[col] = test_predictions_hgb[col]

submission_df.to_csv('submission_hgb.csv', index=False)

print("Submission file 'submission.csv' created successfully!")

In [None]:
from sklearn.linear_model import Ridge

In [None]:
# Initialize a dictionary to store trained Ridge models
ridge_models = {}

# Iterate through each target column in the y DataFrame
for col in y.columns:
    print(f"Training Ridge model for {col}...")
    # Instantiate a Ridge model
    model = Ridge(random_state=42)
    # Fit the model to the engineered training data and the current target
    model.fit(X_train_engineered_summary, y[col])
    # Store the trained model in the dictionary
    ridge_models[col] = model

print("Ridge model training complete.")

In [None]:
test_predictions_ridge = {}
for col in ridge_models:
    test_predictions_ridge[col] = ridge_models[col].predict(X_test_engineered_summary)

print("Predictions on test data complete.")

In [None]:
submission_df = pd.DataFrame(test['ID'])

for col in test_predictions_ridge:
    submission_df[col] = test_predictions_ridge[col]

submission_df.to_csv('submission_ridge.csv', index=False)

print("Submission file 'submission_ridge.csv' created successfully!")

In [None]:
from sklearn.metrics import mean_absolute_percentage_error

print("MAPE on training data for each BlendProperty (LightGBM - sklearn):")
for col in y.columns:
    model = lgbm_models[col]
    y_train_pred = model.predict(X_train_engineered_summary)
    mape = mean_absolute_percentage_error(y[col], y_train_pred)
    print(f"  {col}: {mape:.2f}%")

print("\nMAPE on training data for each BlendProperty (CatBoost - sklearn):")
for col in y.columns:
    model = catboost_models[col]
    y_train_pred = model.predict(X_train_engineered_summary)
    mape = mean_absolute_percentage_error(y[col], y_train_pred)
    print(f"  {col}: {mape:.2f}%")

print("\nMAPE on training data for each BlendProperty (XGBoost - sklearn):")
for col in y.columns:
    model = xgboost_models[col]
    y_train_pred = model.predict(X_train_engineered_summary)
    mape = mean_absolute_percentage_error(y[col], y_train_pred)
    print(f"  {col}: {mape:.2f}%")

print("\nMAPE on training data for each BlendProperty (HistGradientBoostingRegressor - sklearn):")
for col in y.columns:
    model = hgb_models[col]
    y_train_pred = model.predict(X_train_engineered_summary)
    mape = mean_absolute_percentage_error(y[col], y_train_pred)
    print(f"  {col}: {mape:.2f}%")

print("\nMAPE on training data for each BlendProperty (Ridge - sklearn):")
for col in y.columns:
    model = ridge_models[col]
    y_train_pred = model.predict(X_train_engineered_summary)
    mape = mean_absolute_percentage_error(y[col], y_train_pred)
    print(f"  {col}: {mape:.2f}%")

In [None]:
# Load the test predictions from each model
test_predictions_catboost_df = pd.read_csv('submission_cat.csv')
test_predictions_ridge_df = pd.read_csv('submission_ridge.csv')
test_predictions_lgbm_df = pd.read_csv('submission_lgbm.csv')
test_predictions_hgb_df = pd.read_csv('submission_hgb.csv')

# Store the prediction DataFrames in a dictionary
test_predictions_dfs = {
    'catboost': test_predictions_catboost_df,
    'ridge': test_predictions_ridge_df,
    'lgbm': test_predictions_lgbm_df,
    'hgb': test_predictions_hgb_df
}

print("Test prediction dataframes loaded.")

In [None]:
# Create a copy of one of the loaded test prediction DataFrames to serve as the base
ensembled_predictions_df = test_predictions_catboost_df.copy()

# Define the blend property columns
blend_property_cols = [f'BlendProperty{i}' for i in range(1, 11)]

# Iterate through each blend property column
for col in blend_property_cols:
    # Calculate the average of the predictions from all models
    ensembled_predictions_df[col] = (
        test_predictions_dfs['catboost'][col] +
        test_predictions_dfs['ridge'][col] +
        test_predictions_dfs['lgbm'][col] +
        test_predictions_dfs['hgb'][col]
    ) / len(test_predictions_dfs)

# Display the head of the ensembled_predictions_df
display(ensembled_predictions_df.head())

In [None]:
# Create a submission DataFrame using the ensembled_predictions_df
submission_df = ensembled_predictions_df.copy()

# Save the submission DataFrame to a CSV file
submission_df.to_csv('submission_ensembled.csv', index=False)

print("Submission file 'submission_ensembled.csv' created successfully!")

In [None]:
from sklearn.model_selection import KFold

# Define the number of splits for K-fold cross-validation
n_splits = 5

# Initialize a dictionary to store the out-of-fold predictions
oof_predictions = {
    'catboost': {},
    'ridge': {},
    'lgbm': {},
    'hgb': {}
}

# Iterate through each blend property
for col in y.columns:
    print(f"Generating out-of-fold predictions for {col}...")

    # Iterate through each base model
    for model_name, models_dict in [('catboost', catboost_models),
                                    ('ridge', ridge_models),
                                    ('lgbm', lgbm_models),
                                    ('hgb', hgb_models)]:

        # Initialize an array to store OOF predictions for the current blend property and model
        oof_preds = np.zeros(X_train_engineered_summary.shape[0])

        # Initialize KFold
        kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

        # Iterate through the splits
        for train_index, val_index in kf.split(X_train_engineered_summary):
            # Get training and validation data for the current split
            X_train_fold, X_val_fold = X_train_engineered_summary.iloc[train_index], X_train_engineered_summary.iloc[val_index]
            y_train_fold, y_val_fold = y[col].iloc[train_index], y[col].iloc[val_index]

            # Get the corresponding base model (re-instantiate to ensure fresh training)
            if model_name == 'catboost':
                 model = CatBoostRegressor(random_state=42, verbose=0)
            elif model_name == 'ridge':
                 model = Ridge(random_state=42)
            elif model_name == 'lgbm':
                 model = lgb.LGBMRegressor(random_state=42)
            elif model_name == 'hgb':
                 model = HistGradientBoostingRegressor(random_state=42)

            # Train the model on the training fold
            model.fit(X_train_fold, y_train_fold)

            # Make predictions on the validation fold
            val_preds = model.predict(X_val_fold)

            # Store the validation predictions in the OOF array
            oof_preds[val_index] = val_preds

        # Store the complete OOF predictions for the current model and blend property
        oof_predictions[model_name][col] = oof_preds

print("Out-of-fold prediction generation complete.")

In [None]:
from sklearn.linear_model import Lasso

# Initialize a dictionary to store the trained Lasso meta-regressors
meta_regressors = {}

# Iterate through each blend property
for col in y.columns:
    print(f"Training Lasso meta-regressor for {col}...")

    # Create a DataFrame of OOF predictions for the current blend property
    oof_features = pd.DataFrame({
        'catboost_oof': oof_predictions['catboost'][col],
        'ridge_oof': oof_predictions['ridge'][col],
        'lgbm_oof': oof_predictions['lgbm'][col],
        'hgb_oof': oof_predictions['hgb'][col]
    })

    # Instantiate a Lasso meta-regressor
    meta_model = Lasso(random_state=42)

    # Fit the meta-regressor to the OOF predictions and the actual target values
    meta_model.fit(oof_features, y[col])

    # Store the trained meta-regressor
    meta_regressors[col] = meta_model

print("Lasso meta-regressor training complete.")

In [None]:
# Initialize a dictionary to store test predictions from base models
test_base_predictions = {}

# Iterate through each base model and make predictions on the test data
for model_name, models_dict in [('catboost', catboost_models),
                                ('ridge', ridge_models),
                                ('lgbm', lgbm_models),
                                ('hgb', hgb_models)]:

    test_base_predictions[model_name] = pd.DataFrame(test['ID']) # Initialize DataFrame with 'ID'

    for col in y.columns:
        model = models_dict[col]
        test_base_predictions[model_name][col] = model.predict(X_test_engineered_summary)

print("Test predictions from base models complete.")

In [None]:
# Initialize a dictionary to store the final stacked predictions
stacked_predictions = {}

# Iterate through each blend property and make final predictions using the meta-regressor
for col in y.columns:
    print(f"Making final stacked predictions for {col}...")

    # Create a DataFrame of test predictions from the base models for the current blend property
    test_meta_features = pd.DataFrame({
        'catboost_oof': test_base_predictions['catboost'][col],
        'ridge_oof': test_base_predictions['ridge'][col],
        'lgbm_oof': test_base_predictions['lgbm'][col],
        'hgb_oof': test_base_predictions['hgb'][col]
    })

    # Get the corresponding trained meta-regressor
    meta_model = meta_regressors[col]

    # Make predictions using the meta-regressor
    stacked_predictions[col] = meta_model.predict(test_meta_features)

print("Final stacked predictions complete.")

In [None]:
# Create a submission DataFrame using the test IDs
submission_df = pd.DataFrame(test['ID'])

# Add the stacked predictions to the submission DataFrame
for col in stacked_predictions:
    submission_df[col] = stacked_predictions[col]

# Save the submission DataFrame to a CSV file
submission_df.to_csv('submission_stacked_lasso.csv', index=False)

print("Submission file 'submission_stacked_lasso.csv' created successfully!")

In [None]:
from sklearn.linear_model import Ridge

# Initialize a dictionary to store the trained Ridge meta-regressors
meta_regressors_ridge = {}

# Iterate through each blend property
for col in y.columns:
    print(f"Training Ridge meta-regressor for {col}...")

    # Create a DataFrame of OOF predictions for the current blend property
    oof_features = pd.DataFrame({
        'catboost_oof': oof_predictions['catboost'][col],
        'ridge_oof': oof_predictions['ridge'][col],
        'lgbm_oof': oof_predictions['lgbm'][col],
        'hgb_oof': oof_predictions['hgb'][col]
    })

    # Instantiate a Ridge meta-regressor
    meta_model = Ridge(random_state=42)

    # Fit the meta-regressor to the OOF predictions and the actual target values
    meta_model.fit(oof_features, y[col])

    # Store the trained meta-regressor
    meta_regressors_ridge[col] = meta_model

print("Ridge meta-regressor training complete.")

In [None]:
# Initialize a dictionary to store the final stacked predictions using Ridge meta-regressor
stacked_predictions_ridge = {}

# Iterate through each blend property and make final predictions using the Ridge meta-regressor
for col in y.columns:
    print(f"Making final stacked predictions (Ridge meta-regressor) for {col}...")

    # Create a DataFrame of test predictions from the base models for the current blend property
    test_meta_features = pd.DataFrame({
        'catboost_oof': test_base_predictions['catboost'][col],
        'ridge_oof': test_base_predictions['ridge'][col],
        'lgbm_oof': test_base_predictions['lgbm'][col],
        'hgb_oof': test_base_predictions['hgb'][col]
    })

    # Get the corresponding trained Ridge meta-regressor
    meta_model = meta_regressors_ridge[col]

    # Make predictions using the meta-regressor
    stacked_predictions_ridge[col] = meta_model.predict(test_meta_features)

print("Final stacked predictions (Ridge meta-regressor) complete.")

In [None]:
# Create a submission DataFrame using the test IDs
submission_df_ridge = pd.DataFrame(test['ID'])

# Add the stacked predictions to the submission DataFrame
for col in stacked_predictions_ridge:
    submission_df_ridge[col] = stacked_predictions_ridge[col]

# Save the submission DataFrame to a CSV file
submission_df_ridge.to_csv('submission_stacked_ridge.csv', index=False)

print("Submission file 'submission_stacked_ridge.csv' created successfully!")

In [None]:
from sklearn.linear_model import ElasticNet

# Initialize a dictionary to store the trained ElasticNet meta-regressors
meta_regressors_elasticnet = {}

# Iterate through each blend property
for col in y.columns:
    print(f"Training ElasticNet meta-regressor for {col}...")

    # Create a DataFrame of OOF predictions for the current blend property
    oof_features = pd.DataFrame({
        'catboost_oof': oof_predictions['catboost'][col],
        'ridge_oof': oof_predictions['ridge'][col],
        'lgbm_oof': oof_predictions['lgbm'][col],
        'hgb_oof': oof_predictions['hgb'][col]
    })

    # Instantiate an ElasticNet meta-regressor
    meta_model = ElasticNet(random_state=42)

    # Fit the meta-regressor to the OOF predictions and the actual target values
    meta_model.fit(oof_features, y[col])

    # Store the trained meta-regressor
    meta_regressors_elasticnet[col] = meta_model

print("ElasticNet meta-regressor training complete.")

In [None]:
# Initialize a dictionary to store the final stacked predictions using ElasticNet meta-regressor
stacked_predictions_elasticnet = {}

# Iterate through each blend property and make final predictions using the ElasticNet meta-regressor
for col in y.columns:
    print(f"Making final stacked predictions (ElasticNet meta-regressor) for {col}...")

    # Create a DataFrame of test predictions from the base models for the current blend property
    test_meta_features = pd.DataFrame({
        'catboost_oof': test_base_predictions['catboost'][col],
        'ridge_oof': test_base_predictions['ridge'][col],
        'lgbm_oof': test_base_predictions['lgbm'][col],
        'hgb_oof': test_base_predictions['hgb'][col]
    })

    # Get the corresponding trained ElasticNet meta-regressor
    meta_model = meta_regressors_elasticnet[col]

    # Make predictions using the meta-regressor
    stacked_predictions_elasticnet[col] = meta_model.predict(test_meta_features)

print("Final stacked predictions (ElasticNet meta-regressor) complete.")

In [None]:
# Create a submission DataFrame using the test IDs
submission_df_elasticnet = pd.DataFrame(test['ID'])

# Add the stacked predictions to the submission DataFrame
for col in stacked_predictions_elasticnet:
    submission_df_elasticnet[col] = stacked_predictions_elasticnet[col]

# Save the submission DataFrame to a CSV file
submission_df_elasticnet.to_csv('submission_stacked_elasticnet.csv', index=False)

print("Submission file 'submission_stacked_elasticnet.csv' created successfully!")

In [None]:
import lightgbm as lgb

# Initialize a dictionary to store the trained LightGBM meta-regressors
meta_regressors_lgbm = {}

# Iterate through each blend property
for col in y.columns:
    print(f"Training LightGBM meta-regressor for {col}...")

    # Create a DataFrame of OOF predictions for the current blend property
    oof_features = pd.DataFrame({
        'catboost_oof': oof_predictions['catboost'][col],
        'ridge_oof': oof_predictions['ridge'][col],
        'lgbm_oof': oof_predictions['lgbm'][col],
        'hgb_oof': oof_predictions['hgb'][col]
    })

    # Instantiate a LightGBM meta-regressor
    meta_model = lgb.LGBMRegressor(random_state=42)

    # Fit the meta-regressor to the OOF predictions and the actual target values
    meta_model.fit(oof_features, y[col])

    # Store the trained meta-regressor
    meta_regressors_lgbm[col] = meta_model

print("LightGBM meta-regressor training complete.")

In [None]:
# Initialize a dictionary to store the final stacked predictions using LightGBM meta-regressor
stacked_predictions_lgbm = {}

# Iterate through each blend property and make final predictions using the LightGBM meta-regressor
for col in y.columns:
    print(f"Making final stacked predictions (LightGBM meta-regressor) for {col}...")

    # Create a DataFrame of test predictions from the base models for the current blend property
    test_meta_features = pd.DataFrame({
        'catboost_oof': test_base_predictions['catboost'][col],
        'ridge_oof': test_base_predictions['ridge'][col],
        'lgbm_oof': test_base_predictions['lgbm'][col],
        'hgb_oof': test_base_predictions['hgb'][col]
    })

    # Get the corresponding trained LightGBM meta-regressor
    meta_model = meta_regressors_lgbm[col]

    # Make predictions using the meta-regressor
    stacked_predictions_lgbm[col] = meta_model.predict(test_meta_features)

print("Final stacked predictions (LightGBM meta-regressor) complete.")

In [None]:
# Create a submission DataFrame using the test IDs
submission_df_lgbm_stacked = pd.DataFrame(test['ID'])

# Add the stacked predictions to the submission DataFrame
for col in stacked_predictions_lgbm:
    submission_df_lgbm_stacked[col] = stacked_predictions_lgbm[col]

# Save the submission DataFrame to a CSV file
submission_df_lgbm_stacked.to_csv('submission_stacked_lgbm.csv', index=False)

print("Submission file 'submission_stacked_lgbm.csv' created successfully!")

In [None]:
from catboost import CatBoostRegressor

# Initialize a dictionary to store the trained CatBoost meta-regressors
meta_regressors_catboost = {}

# Iterate through each blend property
for col in y.columns:
    print(f"Training CatBoost meta-regressor for {col}...")

    # Create a DataFrame of OOF predictions for the current blend property
    oof_features = pd.DataFrame({
        'catboost_oof': oof_predictions['catboost'][col],
        'ridge_oof': oof_predictions['ridge'][col],
        'lgbm_oof': oof_predictions['lgbm'][col],
        'hgb_oof': oof_predictions['hgb'][col]
    })

    # Instantiate a CatBoost meta-regressor
    meta_model = CatBoostRegressor(random_state=42, verbose=0) # verbose=0 to suppress output

    # Fit the meta-regressor to the OOF predictions and the actual target values
    meta_model.fit(oof_features, y[col])

    # Store the trained meta-regressor
    meta_regressors_catboost[col] = meta_model

print("CatBoost meta-regressor training complete.")

In [None]:
# Initialize a dictionary to store the final stacked predictions using CatBoost meta-regressor
stacked_predictions_catboost = {}

# Iterate through each blend property and make final predictions using the CatBoost meta-regressor
for col in y.columns:
    print(f"Making final stacked predictions (CatBoost meta-regressor) for {col}...")

    # Create a DataFrame of test predictions from the base models for the current blend property
    test_meta_features = pd.DataFrame({
        'catboost_oof': test_base_predictions['catboost'][col],
        'ridge_oof': test_base_predictions['ridge'][col],
        'lgbm_oof': test_base_predictions['lgbm'][col],
        'hgb_oof': test_base_predictions['hgb'][col]
    })

    # Get the corresponding trained CatBoost meta-regressor
    meta_model = meta_regressors_catboost[col]

    # Make predictions using the meta-regressor
    stacked_predictions_catboost[col] = meta_model.predict(test_meta_features)

print("Final stacked predictions (CatBoost meta-regressor) complete.")

In [None]:
# Create a submission DataFrame using the test IDs
submission_df_catboost = pd.DataFrame(test['ID'])

# Add the stacked predictions to the submission DataFrame
for col in stacked_predictions_catboost:
    submission_df_catboost[col] = stacked_predictions_catboost[col]

# Save the submission DataFrame to a CSV file
submission_df_catboost.to_csv('submission_stacked_catboost.csv', index=False)

print("Submission file 'submission_stacked_catboost.csv' created successfully!")

In [None]:
# Initialize a dictionary to store predictions on full training data from base models
train_base_predictions_full = {}

# Iterate through each base model and make predictions on the full training data
for model_name, models_dict in [('catboost', catboost_models),
                                ('ridge', ridge_models),
                                ('lgbm', lgbm_models),
                                ('hgb', hgb_models)]:

    train_base_predictions_full[model_name] = pd.DataFrame(y.index) # Initialize DataFrame with index

    for col in y.columns:
        model = models_dict[col]
        train_base_predictions_full[model_name][col] = model.predict(X_train_engineered_summary)

print("Predictions on full training data from base models complete.")

In [None]:
from sklearn.linear_model import ElasticNet

# Initialize a dictionary to store the trained ElasticNet meta-regressors
meta_regressors_elasticnet_full_train = {}

# Iterate through each blend property
for col in y.columns:
    print(f"Training ElasticNet meta-regressor for {col} on full training data predictions...")

    # Create a DataFrame of full training data predictions for the current blend property
    train_meta_features_full = pd.DataFrame({
        'catboost_train_pred': train_base_predictions_full['catboost'][col],
        'ridge_train_pred': train_base_predictions_full['ridge'][col],
        'lgbm_train_pred': train_base_predictions_full['lgbm'][col],
        'hgb_train_pred': train_base_predictions_full['hgb'][col]
    })

    # Instantiate an ElasticNet meta-regressor
    meta_model = ElasticNet(random_state=42)

    # Fit the meta-regressor to the full training data predictions and the actual target values
    meta_model.fit(train_meta_features_full, y[col])

    # Store the trained meta-regressor
    meta_regressors_elasticnet_full_train[col] = meta_model

print("ElasticNet meta-regressor training on full training data predictions complete.")

In [None]:
# Initialize a dictionary to store the final stacked predictions using ElasticNet meta-regressor trained on full training data
stacked_predictions_elasticnet_full_train = {}

# Iterate through each blend property and make final predictions using the ElasticNet meta-regressor
for col in y.columns:
    print(f"Making final stacked predictions (ElasticNet meta-regressor full training) for {col}...")

    # Create a DataFrame of test predictions from the base models for the current blend property
    test_meta_features_full = pd.DataFrame({
        'catboost_train_pred': test_base_predictions['catboost'][col],
        'ridge_train_pred': test_base_predictions['ridge'][col],
        'lgbm_train_pred': test_base_predictions['lgbm'][col],
        'hgb_train_pred': test_base_predictions['hgb'][col]
    })

    # Get the corresponding trained ElasticNet meta-regressor
    meta_model = meta_regressors_elasticnet_full_train[col]

    # Make predictions using the meta-regressor
    stacked_predictions_elasticnet_full_train[col] = meta_model.predict(test_meta_features_full)

print("Final stacked predictions (ElasticNet meta-regressor full training) complete.")

In [None]:
# Create a submission DataFrame using the test IDs
submission_df_elasticnet_full_train = pd.DataFrame(test['ID'])

# Add the stacked predictions to the submission DataFrame
for col in stacked_predictions_elasticnet_full_train:
    submission_df_elasticnet_full_train[col] = stacked_predictions_elasticnet_full_train[col]

# Save the submission DataFrame to a CSV file
submission_df_elasticnet_full_train.to_csv('submission_stacked_elasticnet_full_train.csv', index=False)

print("Submission file 'submission_stacked_elasticnet_full_train.csv' created successfully!")

In [None]:
from sklearn.linear_model import Ridge

# Initialize a dictionary to store the trained Ridge meta-regressors
meta_regressors_ridge_full_train = {}

# Iterate through each blend property
for col in y.columns:
    print(f"Training Ridge meta-regressor for {col} on full training data predictions...")

    # Create a DataFrame of full training data predictions for the current blend property
    train_meta_features_full = pd.DataFrame({
        'catboost_train_pred': train_base_predictions_full['catboost'][col],
        'ridge_train_pred': train_base_predictions_full['ridge'][col],
        'lgbm_train_pred': train_base_predictions_full['lgbm'][col],
        'hgb_train_pred': train_base_predictions_full['hgb'][col]
    })

    # Instantiate a Ridge meta-regressor
    meta_model = Ridge(random_state=42)

    # Fit the meta-regressor to the full training data predictions and the actual target values
    meta_model.fit(train_meta_features_full, y[col])

    # Store the trained meta-regressor
    meta_regressors_ridge_full_train[col] = meta_model

print("Ridge meta-regressor training on full training data predictions complete.")

In [None]:
# Initialize a dictionary to store the final stacked predictions using Ridge meta-regressor trained on full training data
stacked_predictions_ridge_full_train = {}

# Iterate through each blend property and make final predictions using the Ridge meta-regressor
for col in y.columns:
    print(f"Making final stacked predictions (Ridge meta-regressor full training) for {col}...")

    # Create a DataFrame of test predictions from the base models for the current blend property
    test_meta_features_full = pd.DataFrame({
        'catboost_train_pred': test_base_predictions['catboost'][col],
        'ridge_train_pred': test_base_predictions['ridge'][col],
        'lgbm_train_pred': test_base_predictions['lgbm'][col],
        'hgb_train_pred': test_base_predictions['hgb'][col]
    })

    # Get the corresponding trained Ridge meta-regressor
    meta_model = meta_regressors_ridge_full_train[col]

    # Make predictions using the meta-regressor
    stacked_predictions_ridge_full_train[col] = meta_model.predict(test_meta_features_full)

print("Final stacked predictions (Ridge meta-regressor full training) complete.")

In [None]:
# Create a submission DataFrame using the test IDs
submission_df_ridge_full_train = pd.DataFrame(test['ID'])

# Add the stacked predictions to the submission DataFrame
for col in stacked_predictions_ridge_full_train:
    submission_df_ridge_full_train[col] = stacked_predictions_ridge_full_train[col]

# Save the submission DataFrame to a CSV file
submission_df_ridge_full_train.to_csv('submission_stacked_ridge_full_train.csv', index=False)

print("Submission file 'submission_stacked_ridge_full_train.csv' created successfully!")