In [16]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import GridSearchCV
from xgboost import XGBRegressor

def train_model(data, test_size=0.2, hyperparameters={}):
    """
    Train a machine learning model.

    Parameters:
    - model: The machine learning model to be trained.
    - data: A tuple or list containing features and labels.
    - test_size: The proportion of the dataset to include in the test split.
    - hyperparameters: Hyperparameters for the model (as a dictionary).

    Returns:
    - trained_model: The trained machine learning model.
    - test_predictions: The predictions on the test set.
    - test_labels: The actual labels of the test set.
    - test_accuracy: The accuracy of the model on the test set.
    """
    # Split the data into training and testing sets
    features, labels = data
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=test_size, random_state=42, shuffle=False)

    model = XGBRegressor(**hyperparameters)
    model.fit(X_train, y_train)
    # # Hyperparameter tuning using GridSearchCV
    # if hyperparameters:
    #     grid_search = GridSearchCV(model, hyperparameters, cv=5)
    #     grid_search.fit(X_train, y_train)
    #     model = grid_search.best_estimator_
    # else:
    #     # Train the model with default hyperparameters
    #     model.fit(X_train, y_train)

    # Make predictions on the test set
    test_predictions = model.predict(X_test)

    # Evaluate the model on the test set
    mse = mean_squared_error(y_test, test_predictions)
    mae = mean_absolute_error(y_test, test_predictions)
    r2  = r2_score(y_test, test_predictions)

    return mse, mae, r2, test_predictions

In [38]:
import warnings

# Ignore all FutureWarnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [72]:

import pandas as pd

dataframe = pd.read_csv('../../Data/data_fix_temp.txt')
features =  dataframe.drop(columns=['t_obs','Date', 'con_prec.mm.', 'mic_prec.mm.', 'sha_prec.mm.', 'hari', 'UTC', 'LAT', 'LON']) 
target = dataframe['t_obs']
data = (features, target)
test_size = 0.5
hyperparameters = {
    'verbosity': 2,
    'n_jobs': -1,
    'colsample_bytree': 0.7,
    'learning_rate': 0.09,
    'max_depth': 8,
    'min_child_weight': 2,
    'n_estimators': 50,
    'random_state': 6601,
    'reg_alpha': 0.5,
    'lambda' : 0.9,
    'num_parallel_tree' : ,
}
mse, mae, r2, test_pred = train_model(data, test_size, hyperparameters)

print(f'Mean Squared Error: {mse}')
print(f'Mean Absolute Error: {mae}')
print(f'R2 Score: {r2}\n')

Mean Squared Error: 2.9952137473952445
Mean Absolute Error: 1.3498726493293727
R2 Score: 0.6890593567648478



###### 0.6875549881570844
###### 'verbosity': 2,
###### 'n_jobs': -1,
###### 'colsample_bytree': 0.7,
###### 'learning_rate': 0.09,
###### 'max_depth': 8,
###### 'min_child_weight': 2,
###### 'n_estimators': 50,
###### 'random_state': 6601,
###### 'reg_alpha': 0.5,
###### 'lambda' : 0.9,
###### 'tree_method' : 'exact',