# Random Forest Assignment

In [1]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Assuming X_train, X_test, y_train, y_test are your training and testing data and labels respectively

In [2]:
# Step 1: Automated feature selection
feature_selection = SelectFromModel(RandomForestClassifier(n_estimators=100))


In [3]:
# Step 2: Numerical pipeline
numerical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

In [4]:
# Step 3: Categorical pipeline
categorical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

In [None]:
# Step 4: Combining numerical and categorical pipelines
preprocessor = ColumnTransformer([
    ('num', numerical_pipeline, numerical_features),
    ('cat', categorical_pipeline, categorical_features)
])


In [None]:
# Step 5: Final pipeline with Random Forest Classifier and Logistc Regression Classifier
final_pipeline = Pipeline([
    ('feature_selection', feature_selection),
    ('preprocessor', preprocessor),
    ('voting_classifier', VotingClassifier(estimators=[
        ('rf', RandomForestClassifier(n_estimators=100)),
        ('lr', LogisticRegression())
    ]))
])

In [None]:
# Step 6: Train the pipeline
final_pipeline.fit(X_train, y_train)


In [None]:
# Step 7: Evaluate accuracy on the test dataset
y_pred = final_pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


In [None]:
EnsembleTechniquesTypes5