In [26]:
import sys
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.svm import SVC, SVR
from xgboost import XGBClassifier, XGBRegressor
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.ensemble import StackingClassifier, StackingRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor

### Loading Datasets

In [3]:
training_data_int = pd.read_csv('data/training_data.csv', delimiter=';')
test_data_int = pd.read_csv('data/test_data_no_target.csv', delimiter=';')
group_dict = pd.read_csv('data/group_dictionary.csv', delimiter=';')
column_names_dict = pd.read_csv('data/column_names_dictionary.csv', delimiter=';')

### Data Preprocessing (Renaming column names, string-to-float)

In [4]:
# Edison's code for renaming column names
# Convert the column dictionary DataFrame to a dictionary
column_dict = dict(zip(column_names_dict['CODE'], column_names_dict['INDICATOR NAME']))
column_dict['Perform'] = 'Perform'  # Add mapping for 'perform' column
column_dict['Class'] = 'Class'      # Add mapping for 'class' column

# Map the encrypted column names to their original names using the column dictionary
training_data = training_data_int.rename(columns=column_dict)
test_data = test_data_int.rename(columns=column_dict)

numeric_columns = training_data.columns[1:-2] 

# Convert all columns except the first one to numeric
for column in numeric_columns:
    training_data[column] = training_data[column].str.replace(',', '.').astype(float)
    test_data[column] = test_data[column].str.replace(',', '.').astype(float)

training_data["Perform"] = training_data["Perform"].str.replace(',', '.').astype(float)

# Ensure columns are correctly targeted and all columns can be converted
try:
    numeric_columns = training_data.columns[1:-2]  # Adjust indices appropriately if needed

    # Convert all columns except the first and last two to float, handle non-string types
    for column in numeric_columns:
        training_data[column] = training_data[column].astype(str).str.replace(',', '.').astype(float)
        test_data[column] = test_data[column].astype(str).str.replace(',', '.').astype(float)

    # Convert 'Perform' column, ensure it's handled as a string if necessary
    if 'Perform' in training_data.columns:
        training_data['Perform'] = training_data['Perform'].astype(str).str.replace(',', '.').astype(float)

    print("Conversion successful.")

except Exception as e:
    print(f"An error occurred: {e}")
    
training_data.head()

Conversion successful.


Unnamed: 0,Industry sector,"Return on Average Total Assets - %, TTM","Return on Average Common Equity - %, TTM","EBITDA Percentage of Common Equity, TTM","EBITDA Percentage of Total Fixed Assets - Net, TTM",Excess Cash Margin - %,"Free Cash Flow Percentage of Total Revenue, TTM","Cash Flow from Operations Pct of Total Revenue, TTM","Cash Flow from Operations Pct of Common Equity, TTM","Cash Flow from Operations to Total Assets, TTM",...,1-year Absolute Change of Working Capital to Total Assets,1-year Absolute Change of Cash Ratio,1-year Absolute Change of Net Debt to Total Equity,1-year Absolute Change of Total Liabilities Percentage of Total Assets,1-year Absolute Change of Long Term Debt Percentage of Total Assets,1-year Absolute Change of Book Value Percentage of Market Capitalization,1-year Absolute Change of Cash Flow from Operations Pct of Capital Expenditures,"1-year Absolute Change of Price to Cash Flow from Operations per Share, TTM",Class,Perform
0,G9,0.136495,-0.028429,-0.037772,-0.232459,-0.016222,-0.187506,-0.322545,-0.043743,0.125389,...,0.102563,0.188481,-0.016027,-0.135451,-0.189667,0.250967,0.022171,-0.004265,-1,-0.033764
1,G5,-0.714522,-0.042137,-0.052968,-0.796862,-0.018394,0.070102,-0.076321,-0.063864,-1.045521,...,-0.016375,0.020727,-0.006525,-0.01879,-0.098543,0.317744,-0.180502,-0.009215,1,0.127771
2,G10,0.104791,-0.038188,-0.053191,0.620233,0.148587,0.489875,0.319274,-0.060246,0.053174,...,0.272937,0.774169,-0.007144,0.123954,0.0,-0.110103,0.186669,-0.03072,1,0.1835
3,G2,-0.532847,-0.006582,-0.023377,1.306702,-0.068909,0.048024,-0.119481,-0.021057,-1.012916,...,0.004938,0.018494,-0.00335,-0.029214,0.045747,-0.076884,-0.037859,-0.012046,-1,-0.035668
4,G3,-0.200815,-0.016334,-0.036754,-0.886675,0.484495,-1.148744,0.152517,-0.04358,-0.935537,...,-0.014812,-0.324584,-0.019002,-0.379323,-0.046024,0.282145,0.011008,0.010496,1,0.235055


### Feature Engineering (one-hot encoding for 'Group' column)

In [5]:
# Check if 'Industry sector' column exists
if 'Industry sector' in training_data.columns:
    # Apply one-hot encoding
    industry_encoded = pd.get_dummies(training_data['Industry sector'], prefix='Industry sector')
    training_data = pd.concat([training_data, industry_encoded], axis=1)
    training_data.drop('Industry sector', axis=1, inplace=True)
    print("One-hot encoding successful for 'Industry sector' column.")
else:
    print("'Industry sector' column does not exist in the training dataset.")

if 'Industry sector' in test_data.columns:
    # Apply one-hot encoding
    industry_encoded = pd.get_dummies(test_data['Industry sector'], prefix='Industry sector')
    test_data = pd.concat([test_data, industry_encoded], axis=1)
    test_data.drop('Industry sector', axis=1, inplace=True)
    print("One-hot encoding successful for 'Industry sector' column.")
else:
    print("'Industry sector' column does not exist in the test dataset.")
    
training_data.head()


One-hot encoding successful for 'Industry sector' column.
One-hot encoding successful for 'Industry sector' column.


Unnamed: 0,"Return on Average Total Assets - %, TTM","Return on Average Common Equity - %, TTM","EBITDA Percentage of Common Equity, TTM","EBITDA Percentage of Total Fixed Assets - Net, TTM",Excess Cash Margin - %,"Free Cash Flow Percentage of Total Revenue, TTM","Cash Flow from Operations Pct of Total Revenue, TTM","Cash Flow from Operations Pct of Common Equity, TTM","Cash Flow from Operations to Total Assets, TTM","Reinvestment Rate - %, TTM",...,Industry sector_G10,Industry sector_G11,Industry sector_G2,Industry sector_G3,Industry sector_G4,Industry sector_G5,Industry sector_G6,Industry sector_G7,Industry sector_G8,Industry sector_G9
0,0.136495,-0.028429,-0.037772,-0.232459,-0.016222,-0.187506,-0.322545,-0.043743,0.125389,-0.014757,...,0,0,0,0,0,0,0,0,0,1
1,-0.714522,-0.042137,-0.052968,-0.796862,-0.018394,0.070102,-0.076321,-0.063864,-1.045521,-0.037353,...,0,0,0,0,0,1,0,0,0,0
2,0.104791,-0.038188,-0.053191,0.620233,0.148587,0.489875,0.319274,-0.060246,0.053174,-0.025008,...,1,0,0,0,0,0,0,0,0,0
3,-0.532847,-0.006582,-0.023377,1.306702,-0.068909,0.048024,-0.119481,-0.021057,-1.012916,-0.011783,...,0,0,1,0,0,0,0,0,0,0
4,-0.200815,-0.016334,-0.036754,-0.886675,0.484495,-1.148744,0.152517,-0.04358,-0.935537,-0.023262,...,0,0,0,1,0,0,0,0,0,0


### Handling Missing Values

In [7]:
# Setting up SimpleImputer to use the median strategy for numeric columns
imputer = SimpleImputer(strategy='median')

# Identify numeric columns that are common to both datasets
common_numeric_columns = training_data.select_dtypes(include=[np.number]).columns.intersection(test_data.select_dtypes(include=[np.number]).columns)

# Apply imputation to the training data
training_data[common_numeric_columns] = imputer.fit_transform(training_data[common_numeric_columns])

# Apply imputation to the test data (the same as in training dataset)
test_data[common_numeric_columns] = imputer.transform(test_data[common_numeric_columns])

print(training_data.isnull().sum().sum())
print(test_data.isnull().sum().sum())

0
0


### Feature Engineering (Creating new var)

In [8]:
new_feature_names = ['debt_to_equity', 'current_ratio', 'return_on_assets', 'net_profit_margin']
if not all(col in training_data.columns for col in new_feature_names):
        new_features = pd.DataFrame({
            'debt_to_equity': training_data['EBITDA Percentage of Total Fixed Assets - Net, TTM'] / training_data['EBITDA Percentage of Common Equity, TTM'],
            'current_ratio': training_data['Excess Cash Margin - %'] / training_data['Free Cash Flow Percentage of Total Revenue, TTM'],
            'return_on_assets': training_data['Cash Flow from Operations Pct of Total Revenue, TTM'] / training_data['Cash Flow from Operations Pct of Common Equity, TTM'],
            'net_profit_margin': training_data['Cash Flow from Operations to Total Assets, TTM'] / training_data['Reinvestment Rate - %, TTM']
        })
        training_data = pd.concat([training_data, new_features], axis=1)
else:
    print("New features already exist in the DataFrame. Skipping creation in train data.")
training_data.head()

if not all(col in test_data.columns for col in new_feature_names):
        new_features = pd.DataFrame({
            'debt_to_equity': test_data['EBITDA Percentage of Total Fixed Assets - Net, TTM'] / test_data['EBITDA Percentage of Common Equity, TTM'],
            'current_ratio': test_data['Excess Cash Margin - %'] / test_data['Free Cash Flow Percentage of Total Revenue, TTM'],
            'return_on_assets': test_data['Cash Flow from Operations Pct of Total Revenue, TTM'] / test_data['Cash Flow from Operations Pct of Common Equity, TTM'],
            'net_profit_margin': test_data['Cash Flow from Operations to Total Assets, TTM'] / test_data['Reinvestment Rate - %, TTM']
        })
        test_data = pd.concat([test_data, new_features], axis=1)
else:
    print("New features already exist in the DataFrame. Skipping creation in test data.")
training_data.head()


Unnamed: 0,"Return on Average Total Assets - %, TTM","Return on Average Common Equity - %, TTM","EBITDA Percentage of Common Equity, TTM","EBITDA Percentage of Total Fixed Assets - Net, TTM",Excess Cash Margin - %,"Free Cash Flow Percentage of Total Revenue, TTM","Cash Flow from Operations Pct of Total Revenue, TTM","Cash Flow from Operations Pct of Common Equity, TTM","Cash Flow from Operations to Total Assets, TTM","Reinvestment Rate - %, TTM",...,Industry sector_G4,Industry sector_G5,Industry sector_G6,Industry sector_G7,Industry sector_G8,Industry sector_G9,debt_to_equity,current_ratio,return_on_assets,net_profit_margin
0,0.136495,-0.028429,-0.037772,-0.232459,-0.016222,-0.187506,-0.322545,-0.043743,0.125389,-0.014757,...,0.0,0.0,0.0,0.0,0.0,1.0,6.154294,0.086512,7.373653,-8.496662
1,-0.714522,-0.042137,-0.052968,-0.796862,-0.018394,0.070102,-0.076321,-0.063864,-1.045521,-0.037353,...,0.0,1.0,0.0,0.0,0.0,0.0,15.044252,-0.262381,1.195052,27.990212
2,0.104791,-0.038188,-0.053191,0.620233,0.148587,0.489875,0.319274,-0.060246,0.053174,-0.025008,...,0.0,0.0,0.0,0.0,0.0,0.0,-11.660528,0.303317,-5.299553,-2.126319
3,-0.532847,-0.006582,-0.023377,1.306702,-0.068909,0.048024,-0.119481,-0.021057,-1.012916,-0.011783,...,0.0,0.0,0.0,0.0,0.0,0.0,-55.895809,-1.434897,5.674097,85.964372
4,-0.200815,-0.016334,-0.036754,-0.886675,0.484495,-1.148744,0.152517,-0.04358,-0.935537,-0.023262,...,0.0,0.0,0.0,0.0,0.0,0.0,24.12485,-0.42176,-3.499725,40.217448


### Base Models

In [16]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, StackingClassifier, StackingRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.svm import SVC, SVR
from xgboost import XGBClassifier, XGBRegressor
from sklearn.model_selection import cross_val_predict, train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.preprocessing import StandardScaler

X_train = training_data.drop(columns=['Class', 'Perform'])
y_train_class = training_data['Class']
y_train_perform = training_data['Perform']
X_test = test_data.copy()

X_train_sample, _, y_train_class_sample, _ = train_test_split(X_train, y_train_class, train_size=0.1, random_state=42)
_, _, y_train_perform_sample, _ = train_test_split(X_train, y_train_perform, train_size=0.1, random_state=42)

scaler = StandardScaler()
X_train_sample_scaled = scaler.fit_transform(X_train_sample)
X_test_scaled = scaler.transform(X_test)

rf_clf = RandomForestClassifier(n_estimators=50, n_jobs=-1, random_state=42)
xgb_clf = XGBClassifier(n_estimators=50, n_jobs=-1, random_state=42, use_label_encoder=False, eval_metric='mlogloss')
# svm_clf = SVC(probability=True, random_state=42)

rf_reg = RandomForestRegressor(n_estimators=50, n_jobs=-1, random_state=42)
xgb_reg = XGBRegressor(n_estimators=50, n_jobs=-1, random_state=42)
# svm_reg = SVR()

meta_clf = LogisticRegression(max_iter=200)
meta_reg = LinearRegression()

estimators_clf = [('rf', rf_clf), ('xgb', xgb_clf)]  
stacking_clf = StackingClassifier(estimators=estimators_clf, final_estimator=meta_clf, cv=3)

estimators_reg = [('rf', rf_reg), ('xgb', xgb_reg)]  
stacking_reg = StackingRegressor(estimators=estimators_reg, final_estimator=meta_reg, cv=3)

# Train the stacking classifier
stacking_clf.fit(X_train_sample_scaled, y_train_class_sample)

# Train the stacking regressor
stacking_reg.fit(X_train_sample_scaled, y_train_perform_sample)

# Evaluate the models using cross-validation - Classification evaluation
y_pred_class = cross_val_predict(stacking_clf, X_train_sample_scaled, y_train_class_sample, cv=3)
accuracy = accuracy_score(y_train_class_sample, y_pred_class)
print(f"Classification Accuracy: {accuracy}")

# Regression evaluation
y_pred_perform = cross_val_predict(stacking_reg, X_train_sample_scaled, y_train_perform_sample, cv=3)
mse = mean_squared_error(y_train_perform_sample, y_pred_perform)
print(f"Regression Mean Squared Error: {mse}")

# Predict on test data
test_pred_class = stacking_clf.predict(X_test_scaled)

# Regression predictions
test_pred_perform = stacking_reg.predict(X_test_scaled)

# Convert regression predictions to classes based on thresholds
median_perform = y_train_perform.median()
test_pred_class_from_perform = pd.cut(test_pred_perform, bins=[-float('inf'), -median_perform, median_perform, float('inf')], labels=[-1, 0, 1])

submission_clf = pd.DataFrame(test_pred_class, columns=['Predicted'])
submission_clf.to_csv('data/submission_stacking_clf.csv', index=False, header=False)

submission_reg_class = pd.DataFrame(test_pred_class_from_perform, columns=['Predicted'])
submission_reg_class.to_csv('data/submission_stacking_reg_class.csv', index=False, header=False)


Classification Accuracy: 0.47
Regression Mean Squared Error: 0.021050136809374884


### Hyperparameter Tuning (Random Search)

In [29]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, StackingClassifier, StackingRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression
from xgboost import XGBClassifier, XGBRegressor
from sklearn.model_selection import GridSearchCV, StratifiedKFold, cross_val_predict, train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.preprocessing import StandardScaler

X_train = training_data.drop(columns=['Class', 'Perform'])
y_train_class = training_data['Class']
y_train_perform = training_data['Perform']
X_test = test_data.copy()

X_train_sample, _, y_train_class_sample, _ = train_test_split(X_train, y_train_class, train_size=0.05, random_state=42)
_, _, y_train_perform_sample, _ = train_test_split(X_train, y_train_perform, train_size=0.05, random_state=42)

X_train_sample = X_train_sample.astype(np.float32)
X_test = X_test.astype(np.float32)

scaler = StandardScaler()
X_train_sample_scaled = scaler.fit_transform(X_train_sample)
X_test_scaled = scaler.transform(X_test)

# Remap class labels for XGBoost
y_train_class_sample_remap = y_train_class_sample.map({-1: 0, 0: 1, 1: 2})

# Define the parameter grid for RandomForestClassifier
param_grid_rf = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

""""
# Perform randomized search for RandomForestClassifier
rand_rf = RandomizedSearchCV(RandomForestClassifier(random_state=42), param_grid_rf, cv=3, n_iter=10, n_jobs=-1, random_state=42)
rand_rf.fit(X_train_sample_scaled, y_train_class_sample)
best_rf_clf = rand_rf.best_estimator_
"""

# Perform grid search for RandomForestClassifier
grid_rf = GridSearchCV(RandomForestClassifier(random_state=42), param_grid_rf, cv=3, n_jobs=-1)
grid_rf.fit(X_train_sample_scaled, y_train_class_sample)
best_rf_clf = grid_rf.best_estimator_

#print("Best parameters for RandomForestClassifier: ", rand_rf.best_params_)
print("Best parameters for RandomForestClassifier: ", grid_rf.best_params_)

"""
rand_rf_reg = RandomizedSearchCV(RandomForestRegressor(random_state=42), param_grid_rf, cv=3, n_iter=10, n_jobs=-1, random_state=42)
rand_rf_reg.fit(X_train_sample_scaled, y_train_perform_sample)
best_rf_reg = rand_rf_reg.best_estimator_
"""

# Perform grid search for RandomForestRegressor
grid_rf_reg = GridSearchCV(RandomForestRegressor(random_state=42), param_grid_rf, cv=3, n_jobs=-1)
grid_rf_reg.fit(X_train_sample_scaled, y_train_perform_sample)
best_rf_reg = grid_rf_reg.best_estimator_


#print("Best parameters for RandomForestRegressor: ", rand_rf_reg.best_params_)
print("Best parameters for RandomForestRegressor: ", grid_rf_reg.best_params_)

# Define the parameter grid for XGBClassifier
param_grid_xgb = {
    'eta': [0.01, 0.1, 0.3],
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 6, 9],
    'colsample_bytree': [0.3, 0.7, 1.0],
    'colsample_bylevel': [0.3, 0.7, 1.0],
    'lambda': [0.01, 0.1, 1, 10],
    'alpha': [0.01, 0.1, 1, 10],
    'subsample': [0.3, 0.7, 1.0]
}

"""""
param_grid_xgb = {
    'eta': [0.0001, 0.01, 0.1, 0.3, 0.5, 1],
    'n_estimators': [50, 100, 500, 1000],
    'max_depth': [3, 6, 9, 12, 15],
    'colsample_bytree': [0.1, 0.3, 0.5, 0.7, 0.9, 1.0],
    'colsample_bylevel': [0.1, 0.3, 0.5, 0.7, 0.9, 1.0],
    'lambda': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
    'alpha': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
    'subsample': [0.1, 0.3, 0.5, 0.7, 0.9, 1.0]
}
"""

"""
# Perform random search for XGBClassifier
rand_xgb = RandomizedSearchCV(XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='mlogloss'), param_grid_xgb, cv=3, n_iter=10, n_jobs=-1, random_state=42)
rand_xgb.fit(X_train_sample_scaled, y_train_class_sample_remap)
best_xgb_clf = rand_xgb.best_estimator_
"""

# Perform grid search for XGBClassifier
grid_xgb = GridSearchCV(XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='mlogloss'), param_grid_xgb, cv=3, n_jobs=-1)
grid_xgb.fit(X_train_sample_scaled, y_train_class_sample_remap)
best_xgb_clf = grid_xgb.best_estimator_

#print("Best parameters for XGBClassifier: ", rand_xgb.best_params_)
print("Best parameters for XGBClassifier: ", grid_xgb.best_params_)

"""
# Perform random search for XGBRegressor with a similar grid
rand_xgb_reg = RandomizedSearchCV(XGBRegressor(random_state=42), param_grid_xgb, cv=3, n_iter=10, n_jobs=-1, random_state=42)
rand_xgb_reg.fit(X_train_sample_scaled, y_train_perform_sample)
best_xgb_reg = rand_xgb_reg.best_estimator_
"""

# Perform grid search for XGBRegressor with a similar grid
grid_xgb_reg = GridSearchCV(XGBRegressor(random_state=42), param_grid_xgb, cv=3, n_jobs=-1)
grid_xgb_reg.fit(X_train_sample_scaled, y_train_perform_sample)
best_xgb_reg = grid_xgb_reg.best_estimator_

#print("Best parameters for XGBRegressor: ", rand_xgb_reg.best_params_)
print("Best parameters for XGBRegressor: ", grid_xgb_reg.best_params_)

meta_clf = LogisticRegression(max_iter=200)
meta_reg = LinearRegression()

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

estimators_clf = [('rf', best_rf_clf), ('xgb', best_xgb_clf)]
stacking_clf = StackingClassifier(estimators=estimators_clf, final_estimator=meta_clf, cv=skf)

# Create the stacking ensemble for regression with best estimators
estimators_reg = [('rf', best_rf_reg), ('xgb', best_xgb_reg)]
stacking_reg = StackingRegressor(estimators=estimators_reg, final_estimator=meta_reg, cv=3)

# Train the stacking classifier
stacking_clf.fit(X_train_sample_scaled, y_train_class_sample_remap)

# Train the stacking regressor
stacking_reg.fit(X_train_sample_scaled, y_train_perform_sample)

# Evaluate the models using cross-validation - Classification evaluation
y_pred_class = cross_val_predict(stacking_clf, X_train_sample_scaled, y_train_class_sample_remap, cv=skf)
accuracy = accuracy_score(y_train_class_sample_remap, y_pred_class)
print(f"Classification Accuracy: {accuracy}")

# Regression evaluation
y_pred_perform = cross_val_predict(stacking_reg, X_train_sample_scaled, y_train_perform_sample, cv=3)
mse = mean_squared_error(y_train_perform_sample, y_pred_perform)
print(f"Regression Mean Squared Error: {mse}")

# Predict on test data
test_pred_class = stacking_clf.predict(X_test_scaled)

# Regression predictions
test_pred_perform = stacking_reg.predict(X_test_scaled)

# Convert regression predictions to classes based on thresholds
median_perform = y_train_perform.median()
test_pred_class_from_perform = pd.cut(test_pred_perform, bins=[-float('inf'), -median_perform, median_perform, float('inf')], labels=[-1, 0, 1])

submission_clf = pd.DataFrame(test_pred_class, columns=['Predicted'])
submission_clf.to_csv('data/submission_stacking_clf.csv', index=False, header=False)

submission_reg_class = pd.DataFrame(test_pred_class_from_perform, columns=['Predicted'])
submission_reg_class.to_csv('data/submission_stacking_reg_class.csv', index=False, header=False)


python(69762) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(69763) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(69764) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(69765) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(69766) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(69767) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(69768) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(69769) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


Best parameters for RandomForestClassifier:  {'max_depth': 10, 'min_samples_split': 10, 'n_estimators': 50}
Best parameters for RandomForestRegressor:  {'max_depth': 20, 'min_samples_split': 2, 'n_estimators': 200}
Best parameters for XGBClassifier:  {'alpha': 10, 'colsample_bylevel': 1.0, 'colsample_bytree': 0.7, 'eta': 0.1, 'lambda': 1, 'max_depth': 6, 'n_estimators': 200, 'subsample': 0.3}
Best parameters for XGBRegressor:  {'alpha': 0.01, 'colsample_bylevel': 1.0, 'colsample_bytree': 0.7, 'eta': 0.01, 'lambda': 0.01, 'max_depth': 9, 'n_estimators': 200, 'subsample': 0.3}
Classification Accuracy: 0.4675
Regression Mean Squared Error: 0.020527078094013308
