In [120]:
import pandas as pd

# Sample DataFrame before transformations
data_before = {
    'name': ['apple', 'banana', 'orange', 'grape'],
    'quantity': [5, 8, 3, 6],
    'price': [1.2, 0.5, 0.8, 1.0]
}

df_before = pd.DataFrame(data_before)
df_before.isna().sum()

name        0
quantity    0
price       0
dtype: int64

In [115]:
from sklearn.ensemble import RandomForestRegressor
import numpy as np

# Generate some sample data
np.random.seed(0)
X = np.random.rand(100, 2)  # Two numerical features
y = 2 * X[:, 0] - 3 * X[:, 1] + np.random.normal(0, 0.1, 100)  # Target with linear relationship

# Initialize RandomForestRegressor with criterion='mae'
rf_regressor = RandomForestRegressor(criterion='mae')

# Fit the model
rf_regressor.fit(X, y)

# Predict using the trained model (for demonstration purposes)
predictions = rf_regressor.predict(X)

# Print the predictions (for demonstration purposes)
print("Predictions:", predictions)


InvalidParameterError: The 'criterion' parameter of RandomForestRegressor must be a str among {'friedman_mse', 'absolute_error', 'poisson', 'squared_error'}. Got 'mae' instead.

In [99]:
from sklearn.preprocessing import FunctionTransformer
from sklearn.impute import SimpleImputer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import make_pipeline
import numpy as np




In [100]:
reshape_to_1d = FunctionTransformer(np.reshape, kw_args={"newshape": -1})

# Pipeline for NLP transformations
name_tfidf = make_pipeline(
    SimpleImputer(strategy="constant", fill_value=""),
    reshape_to_1d,  # Redundant if "name" column is already one-dimensional
    TfidfVectorizer(binary=False, max_features=100, stop_words='english')
)


In [102]:
transformed_name_column = name_tfidf.fit_transform(df_before['name'].values.reshape(-1, 1))


In [106]:
transformed_name_column.toarray()

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.]])

In [97]:
# FunctionTransformer to reshape (though not needed)
reshape_to_1d = FunctionTransformer(np.reshape, kw_args={"newshape": -1})

# Pipeline for NLP transformations
name_tfidf = make_pipeline(
    SimpleImputer(strategy="constant", fill_value=""),
    reshape_to_1d,  # Redundant if "name" column is already one-dimensional
    TfidfVectorizer(binary=False, max_features=100, stop_words='english')
)

# Apply transformations to "name" column
transformed_name_column = name_tfidf.fit_transform(df_before['name'].values.reshape(-1, 1))

# Create DataFrame after transformations
df_after = pd.DataFrame(transformed_name_column.toarray(), columns=name_tfidf.named_steps['tfidfvectorizer'].get_feature_names_out())

print("\nDataFrame after transformations:")
print(df_after)


DataFrame after transformations:
   apple  banana  grape  orange
0    1.0     0.0    0.0     0.0
1    0.0     1.0    0.0     0.0
2    0.0     0.0    0.0     1.0
3    0.0     0.0    1.0     0.0
