In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

# Load your dataset
data = pd.read_csv('dataset.csv')

# Separate target variable (y) from features (X)
X = data.drop(columns=['target'])
y = data['target']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create individual classifiers and a scaler
rf_classifier = RandomForestClassifier()
lr_classifier = LogisticRegression(max_iter=1000)
scaler = StandardScaler()

# Create a pipeline for each classifier with scaling
rf_pipeline = Pipeline([
    ('scaler', scaler),
    ('rf', rf_classifier)
])

lr_pipeline = Pipeline([
    ('scaler', scaler),
    ('lr', lr_classifier)
])

# Create a Voting Classifier combining the two pipelines
voting_classifier = VotingClassifier(
    estimators=[
        ('rf', rf_pipeline),
        ('lr', lr_pipeline)
    ],
    voting='hard'  # 'hard' for majority voting, 'soft' for weighted voting based on class probabilities
)

# Train the Voting Classifier pipeline on the training data
voting_classifier.fit(X_train, y_train)

# Make predictions on the test dataset
y_pred = voting_classifier.predict(X_test)

# Evaluate the accuracy of the Voting Classifier
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy of Voting Classifier on the test dataset: {accuracy:.2f}')


Accuracy of Voting Classifier on the test dataset: 0.84
