In [1]:
import importlib
import os
import warnings
from collections import namedtuple
from functools import reduce

import numpy as np
import pandas as pd
import woodwork as ww
from sklearn.utils import check_random_state

from evalml.exceptions import (
    EnsembleMissingPipelinesError,
    MissingComponentError
)
from evalml.utils import get_logger


numeric_and_boolean_ww = [ww.logical_types.Integer, ww.logical_types.Double, ww.logical_types.Boolean]

def _rename_column_names_to_numeric(X):
    """Used in LightGBM classifier class and XGBoost classifier and regressor classes to rename column names
        when the input is a pd.DataFrame in case it has column names that contain symbols ([, ], <) that XGBoost cannot natively handle.

    Arguments:
        X (pd.DataFrame): the input training data of shape [n_samples, n_features]

    Returns:
        Transformed X where column names are renamed to numerical values
    """
    X_t = X
    if isinstance(X, (np.ndarray, list)):
        return pd.DataFrame(X)
    if isinstance(X, ww.DataTable):
        X_t = X.to_dataframe()
        logical_types = X.logical_types
        print(logical_types)
    name_to_col_num = dict((col, col_num) for col_num, col in enumerate(list(X.columns)))
    X_renamed = X_t.rename(columns=name_to_col_num, inplace=False)
    if isinstance(X, ww.DataTable):
        renamed_logical_types = dict((name_to_col_num[col], logical_types[col]) for col in logical_types)
        print(name_to_col_num)
        print(renamed_logical_types)
        print(X_renamed)
        return ww.DataTable(X_renamed, logical_types=renamed_logical_types)
    return X_renamed

In [2]:
import pandas as pd
import numpy as np
import woodwork as ww

In [3]:
df = pd.DataFrame([[1,2,3], [10,20,30], [100,200,300]])
df.columns = pd.MultiIndex.from_tuples((("a", "b"), ("a", "c"), ("d", "f")))

In [4]:
df

Unnamed: 0_level_0,a,a,d
Unnamed: 0_level_1,b,c,f
0,1,2,3
1,10,20,30
2,100,200,300


In [5]:
df.columns = df.columns.to_flat_index()

In [6]:
df = _rename_column_names_to_numeric(df)

In [7]:
df

Unnamed: 0,0,1,2
0,1,2,3
1,10,20,30
2,100,200,300
