In [None]:
import random
from deap import base, creator, tools, algorithms
from autogluon.tabular import TabularDataset, TabularPredictor

# Read the train data from the wine.csv file
train_data = TabularDataset('wine.csv')

# Define the multi-objective optimization problem
creator.create("FitnessMulti", base.Fitness, weights=(1.0, -1.0))
creator.create("Individual", list, fitness=creator.FitnessMulti)

# Define the search space (decision variables)
toolbox = base.Toolbox()
toolbox.register("lambda_param", random.uniform, 1e-4, 1e-1)
toolbox.register("min_child_weight", random.uniform, 1, 10)
toolbox.register("individual", tools.initCycle, creator.Individual, (toolbox.lambda_param, toolbox.min_child_weight), n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# Define the evaluation function
def evaluate(individual):
    lambda_param, min_child_weight = individual
    
    # Train and evaluate the AutoGluon model with the given hyperparameters
    hyperparameter_space = {
        'model': 'XGB',
        'hyperparameters': {
            'lambda': lambda_param,
            'min_child_weight': min_child_weight,
        }
    }
    
    predictor = TabularPredictor(label='WineType').fit(train_data, hyperparameters=hyperparameter_space)
    
    # Obtain the model performance metrics
    accuracy = predictor.evaluate(valid_data)['accuracy']
    
    # Objective: Maximize accuracy, Minimize training time (placeholder; you can adjust based on your goals)
    training_time = 1.0
    
    return accuracy, training_time

toolbox.register("mate", tools.cxBlend, alpha=0.5)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.2)
toolbox.register("select", tools.selNSGA2)

# Define the optimization algorithm (NSGA-II)
def main():
    pop = toolbox.population(n=50)
    algorithms.eaMuPlusLambda(pop, toolbox, mu=50, lambda_=100, cxpb=0.7, mutpb=0.2, ngen=50, stats=None, halloffame=None, verbose=True)

    # Get the best individual(s) found by NSGA-II
    best_individuals = tools.sortNondominated(pop, len(pop), first_front_only=True)[0]

    # Use the best hyperparameters to train the final AutoGluon model
    best_hyperparameters = [indiv[0] for indiv in best_individuals]
    best_hyperparameters = best_hyperparameters[0]  # Assuming only one best individual, you may need to adjust if multiple

    hyperparameter_space = {
        'model': 'XGB',
        'hyperparameters': {
            'lambda': best_hyperparameters[0],
            'min_child_weight': best_hyperparameters[1],
        }
    }

    final_predictor = TabularPredictor(label='WineType').fit(train_data, hyperparameters=hyperparameter_space)
    
    # You can save or use the final_predictor for making predictions on new data
    final_predictions = final_predictor.predict(new_data)

toolbox.register("evaluate", evaluate)


In [None]:
import random
from deap import base, creator, tools, algorithms
from autogluon.tabular import TabularDataset, TabularPredictor
import pandas as pd
from autogluon.tabular.models import XGBoostModel
import matplotlib.pyplot as plt

# Read the wine dataset
dataset = TabularDataset('wine.csv')

# Define the features and target column
features = ['alcohol', 'ash', 'proline']
target = 'alcohol'

# Define the standard hyperparameters for AutoGluon
standard_hyperparameters = {
    'model': 'XGB',
    'hyperparameters': {
        'lambda': 1e-4,
        'min_child_weight': 1,
    }
}

# Train the initial AutoGluon model
initial_predictor = TabularPredictor(label=target).fit(dataset, hyperparameters=standard_hyperparameters)

# Run NSGA-II to find the best hyperparameters
creator.create("FitnessMulti", base.Fitness, weights=(1.0, -1.0))
creator.create("Individual", list, fitness=creator.FitnessMulti)

toolbox = base.Toolbox()
toolbox.register("lambda_param", random.uniform, 1e-4, 1e-1)
toolbox.register("min_child_weight", random.uniform, 1, 10)
toolbox.register("individual", tools.initCycle, creator.Individual, (toolbox.lambda_param, toolbox.min_child_weight), n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

def evaluate(individual):
    lambda_param, min_child_weight = individual
    
    hyperparameters = {
        'model': 'XGB',
        'hyperparameters': {
            'lambda': lambda_param,
            'min_child_weight': min_child_weight,
        }
    }
    
    predictor = TabularPredictor(label=target).fit(dataset, hyperparameters=hyperparameters)
    accuracy = predictor.evaluate(dataset)['accuracy']
    
    training_time = 1.0
    
    return accuracy, training_time

toolbox.register("mate", tools.cxBlend, alpha=0.5)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.2)
toolbox.register("select", tools.selNSGA2)

def main():
    pop = toolbox.population(n=50)
    TabularPredictor.register_model(model_type='XGB', model=XGBoostModel)

    return initial_predictor, final_predictor

# Register XGBoost as a custom model preset
TabularPredictor.register_model(model_type='XGB', model=XGBoostModel)


In [29]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report

# Assuming you have a DataFrame 'df' with columns: 'WineType', 'Alcohol', 'AshAlkalinity', etc.
# Replace 'WineType' with the actual target variable and other columns with your features
df = pd.read_csv('wine.csv')
X = df.drop(columns=['ash', 'proline'])
y = df['alcohol']

# Convert the target variable to integer type
y = y.astype(int)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define XGBoost hyperparameters
params = {
    'objective': 'multi:softmax',  # Example: For multiclass classification
    'num_class': 178,  # Number of classes in your problem
    'booster': 'gbtree',  # 'gbtree' or 'gblinear'
    'lambda': 1,  # Regularization term
    'min_child_weight': 1,  # Minimum sum of instance weight (hessian) needed in a child
    'subsample': 0.8,  # Subsample ratio of the training instance
    'colsample_bylevel': 0.8,  # Subsample ratio of columns for each level
    'colsample_bytree': 0.8,  # Subsample ratio of columns for each tree
    'learning_rate': 0.1,  # Step size shrinkage to prevent overfitting
    'num_boost_round': 100,  # Number of boosting rounds
    'max_depth': 1,  # Maximum depth of a tree
    # Add more XGBoost parameters as needed
}

# Convert data to DMatrix format
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Train the XGBoost model
model = xgb.train(params, dtrain, num_boost_round=10)

# Make predictions on the test set
y_pred = model.predict(dtest)

# Create a confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Create a classification report
report = classification_report(y_test, y_pred)
print("Classification Report:")
print(report)


Parameters: { "num_boost_round" } are not used.



Confusion Matrix:
[[ 2  0  0  0]
 [ 0 13  0  0]
 [ 0  0 17  0]
 [ 0  0  3  1]]
Classification Report:
              precision    recall  f1-score   support

          11       1.00      1.00      1.00         2
          12       1.00      1.00      1.00        13
          13       0.85      1.00      0.92        17
          14       1.00      0.25      0.40         4

    accuracy                           0.92        36
   macro avg       0.96      0.81      0.83        36
weighted avg       0.93      0.92      0.90        36

