In [3]:
import numpy as np 
from sklearn.datasets import make_classification 
from sklearn.model_selection import train_test_split 
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier 
from sklearn.linear_model import LogisticRegression 
from sklearn.metrics import accuracy_score 

# Generate synthetic dataset 
X, y = make_classification( 
    n_samples=500,  
    n_features=10,  
    n_informative=8,  
    n_redundant=2,  
    n_classes=2,  
    random_state=42 
) 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) 

# Base models 
model1 = RandomForestClassifier(n_estimators=100, random_state=42) 
model2 = GradientBoostingClassifier(n_estimators=100, random_state=42) 
model3 = LogisticRegression(random_state=42) 

# Train models 
model1.fit(X_train, y_train) 
model2.fit(X_train, y_train) 
model3.fit(X_train, y_train) 

# Predictions 
pred1 = model1.predict(X_test) 
pred2 = model2.predict(X_test) 
pred3 = model3.predict(X_test) 

# Combine predictions using max voting 
def max_voting(preds): 
    preds_array = np.array(preds).T 
    final_preds = [np.bincount(row).argmax() for row in preds_array] 
    return np.array(final_preds) 

# Combine predictions using averaging 
def averaging(pred_probs): 
    avg_probs = np.mean(pred_probs, axis=0) 
    return np.argmax(avg_probs, axis=1) 

# Combine predictions using weighted averaging 
def weighted_averaging(pred_probs, weights): 
    weighted_avg_probs = np.average(pred_probs, axis=0, weights=weights) 
    return np.argmax(weighted_avg_probs, axis=1) 

# Max Voting 
final_pred_voting = max_voting([pred1, pred2, pred3]) 
voting_acc = accuracy_score(y_test, final_pred_voting) 

# Averaging 
probs1 = model1.predict_proba(X_test) 
probs2 = model2.predict_proba(X_test) 
probs3 = model3.predict_proba(X_test) 
final_pred_avg = averaging([probs1, probs2, probs3]) 
avg_acc = accuracy_score(y_test, final_pred_avg) 

# Weighted Averaging 
weights = [0.5, 0.3, 0.2]  # Example weights for models 
final_pred_weighted_avg = weighted_averaging([probs1, probs2, probs3], weights) 
weighted_avg_acc = accuracy_score(y_test, final_pred_weighted_avg) 

# Print results 
print("Accuracy of Model 1 (Random Forest):", accuracy_score(y_test, pred1)) 
print("Accuracy of Model 2 (Gradient Boosting):", accuracy_score(y_test, pred2)) 
print("Accuracy of Model 3 (Logistic Regression):", accuracy_score(y_test, pred3)) 
print("Ensemble Accuracy (Max Voting):", voting_acc) 
print("Ensemble Accuracy (Averaging):", avg_acc) 
print("Ensemble Accuracy (Weighted Averaging):", weighted_avg_acc)


Accuracy of Model 1 (Random Forest): 0.8733333333333333
Accuracy of Model 2 (Gradient Boosting): 0.8533333333333334
Accuracy of Model 3 (Logistic Regression): 0.8
Ensemble Accuracy (Max Voting): 0.8866666666666667
Ensemble Accuracy (Averaging): 0.8733333333333333
Ensemble Accuracy (Weighted Averaging): 0.8733333333333333


In [None]:
import numpy as np 
from sklearn.datasets import make_classification 
from sklearn.model_selection import train_test_split 
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier 
from sklearn.linear_model import LogisticRegression 
from sklearn.metrics import accuracy_score 

# Modified dataset creation
X, y = make_classification(
    n_samples=1000,         
    n_features=20,          
    n_informative=15,       
    n_redundant=5,          
    n_classes=2,            
    random_state=42
)

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train models again
model1.fit(X_train, y_train)
model2.fit(X_train, y_train)
model3.fit(X_train, y_train)

# Predictions
pred1 = model1.predict(X_test)
pred2 = model2.predict(X_test)
pred3 = model3.predict(X_test)

# Max Voting
final_pred_voting = max_voting([pred1, pred2, pred3])
voting_acc = accuracy_score(y_test, final_pred_voting)

# Averaging
probs1 = model1.predict_proba(X_test)
probs2 = model2.predict_proba(X_test)
probs3 = model3.predict_proba(X_test)
final_pred_avg = averaging([probs1, probs2, probs3])
avg_acc = accuracy_score(y_test, final_pred_avg)

# Weighted Averaging
final_pred_weighted_avg = weighted_averaging([probs1, probs2, probs3], weights)
weighted_avg_acc = accuracy_score(y_test, final_pred_weighted_avg)

# Print updated results
print("Accuracy of Model 1 (Random Forest):", accuracy_score(y_test, pred1))
print("Accuracy of Model 2 (Gradient Boosting):", accuracy_score(y_test, pred2))
print("Accuracy of Model 3 (Logistic Regression):", accuracy_score(y_test, pred3))
print("Ensemble Accuracy (Max Voting):", voting_acc)
print("Ensemble Accuracy (Averaging):", avg_acc)
print("Ensemble Accuracy (Weighted Averaging):", weighted_avg_acc)


Accuracy of Model 1 (Random Forest): 0.8866666666666667
Accuracy of Model 2 (Gradient Boosting): 0.8866666666666667
Accuracy of Model 3 (Logistic Regression): 0.82
Ensemble Accuracy (Max Voting): 0.8866666666666667
Ensemble Accuracy (Averaging): 0.88
Ensemble Accuracy (Weighted Averaging): 0.8866666666666667


In [None]:
import numpy as np 
from sklearn.datasets import make_classification 
from sklearn.model_selection import train_test_split 
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier 
from sklearn.linear_model import LogisticRegression 
from sklearn.metrics import accuracy_score 
from sklearn.svm import SVC  

# Create a synthetic dataset with modified parameters
X, y = make_classification(
    n_samples=1000,       
    n_features=20,        
    n_informative=15,     
    n_redundant=5,        
    n_classes=2,          
    random_state=42
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


model1 = RandomForestClassifier(n_estimators=100, random_state=42)
model2 = GradientBoostingClassifier(n_estimators=100, random_state=42)
model3 = LogisticRegression(random_state=42)
model4 = SVC(random_state=42, probability=True)  # Support Vector Machine

# Train all models
model1.fit(X_train, y_train)  # Random Forest
model2.fit(X_train, y_train)  # Gradient Boosting
model3.fit(X_train, y_train)  # Logistic Regression
model4.fit(X_train, y_train)  # SVM

# Predictions for all models
pred1 = model1.predict(X_test)
pred2 = model2.predict(X_test)
pred3 = model3.predict(X_test)
pred4 = model4.predict(X_test)

# Predictions probabilities for averaging/weighted averaging
probs1 = model1.predict_proba(X_test)
probs2 = model2.predict_proba(X_test)
probs3 = model3.predict_proba(X_test)
probs4 = model4.predict_proba(X_test)

# Max Voting
final_pred_voting = max_voting([pred1, pred2, pred3, pred4])
voting_acc = accuracy_score(y_test, final_pred_voting)

# Averaging
final_pred_avg = averaging([probs1, probs2, probs3, probs4])
avg_acc = accuracy_score(y_test, final_pred_avg)

# Weighted Averaging
weights = [0.5, 0.3, 0.2, 0.1]  # Example weights for models, adjust if needed
final_pred_weighted_avg = weighted_averaging([probs1, probs2, probs3, probs4], weights)
weighted_avg_acc = accuracy_score(y_test, final_pred_weighted_avg)

# Print updated results
print("Accuracy of Model 1 (Random Forest):", accuracy_score(y_test, pred1))
print("Accuracy of Model 2 (Gradient Boosting):", accuracy_score(y_test, pred2))
print("Accuracy of Model 3 (Logistic Regression):", accuracy_score(y_test, pred3))
print("Accuracy of Model 4 (SVM):", accuracy_score(y_test, pred4))
print("Ensemble Accuracy (Max Voting):", voting_acc)
print("Ensemble Accuracy (Averaging):", avg_acc)
print("Ensemble Accuracy (Weighted Averaging):", weighted_avg_acc)


Accuracy of Model 1 (Random Forest): 0.8866666666666667
Accuracy of Model 2 (Gradient Boosting): 0.8866666666666667
Accuracy of Model 3 (Logistic Regression): 0.82
Accuracy of Model 4 (SVM): 0.93
Ensemble Accuracy (Max Voting): 0.8966666666666666
Ensemble Accuracy (Averaging): 0.9066666666666666
Ensemble Accuracy (Weighted Averaging): 0.9033333333333333


In [None]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Generate synthetic dataset
X, y = make_classification(
    n_samples=1000,
    n_features=20,
    n_informative=15,
    n_redundant=5,
    n_classes=2,
    random_state=42
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Base models
model1 = RandomForestClassifier(n_estimators=100, random_state=42)
model2 = GradientBoostingClassifier(n_estimators=100, random_state=42)
model3 = LogisticRegression(random_state=42)
model4 = SVC(probability=True, random_state=42)  # Enable probability estimates for averaging

# Train models
model1.fit(X_train, y_train)
model2.fit(X_train, y_train)
model3.fit(X_train, y_train)
model4.fit(X_train, y_train)

# Predictions probabilities (for averaging and weighted averaging)
probs1 = model1.predict_proba(X_test)
probs2 = model2.predict_proba(X_test)
probs3 = model3.predict_proba(X_test)
probs4 = model4.predict_proba(X_test)

# Function for weighted averaging
def weighted_averaging(pred_probs, weights):
    weighted_avg_probs = np.average(pred_probs, axis=0, weights=weights)
    return np.argmax(weighted_avg_probs, axis=1)


weight_sets = [
    [0.2, 0.2, 0.2, 0.2],  
    [0.4, 0.3, 0.2, 0.1],     
    [0.1, 0.1, 0.4, 0.4],     
    [0.3, 0.3, 0.2, 0.2]      
]

# Evaluate weighted averaging for each weight set
for i, weights in enumerate(weight_sets):
    final_pred_weighted_avg = weighted_averaging([probs1, probs2, probs3, probs4], weights)
    weighted_avg_acc = accuracy_score(y_test, final_pred_weighted_avg)
    print(f"Weighted Averaging Accuracy (Weights {weights}): {weighted_avg_acc:.4f}")


Weighted Averaging Accuracy (Weights [0.2, 0.2, 0.2, 0.2]): 0.9067
Weighted Averaging Accuracy (Weights [0.4, 0.3, 0.2, 0.1]): 0.9000
Weighted Averaging Accuracy (Weights [0.1, 0.1, 0.4, 0.4]): 0.9267
Weighted Averaging Accuracy (Weights [0.3, 0.3, 0.2, 0.2]): 0.9033


In [11]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Generate synthetic dataset
X, y = make_classification(
    n_samples=1000,
    n_features=20,
    n_informative=15,
    n_redundant=5,
    n_classes=2,
    random_state=42
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Base models
model1 = RandomForestClassifier(n_estimators=100, random_state=42)
model2 = GradientBoostingClassifier(n_estimators=100, random_state=42)
model3 = LogisticRegression(random_state=42)
model4 = SVC(probability=True, random_state=42)  # Enable probability estimates for averaging

# Train models
model1.fit(X_train, y_train)
model2.fit(X_train, y_train)
model3.fit(X_train, y_train)
model4.fit(X_train, y_train)

# Predictions probabilities (for averaging and weighted averaging)
probs1 = model1.predict_proba(X_test)
probs2 = model2.predict_proba(X_test)
probs3 = model3.predict_proba(X_test)
probs4 = model4.predict_proba(X_test)

# Function for weighted averaging
def weighted_averaging(pred_probs, weights):
    weighted_avg_probs = np.average(pred_probs, axis=0, weights=weights)
    return np.argmax(weighted_avg_probs, axis=1)

weights1 = [0.2, 0.2, 0.2, 0.2]
final_pred_weighted_avg1 = weighted_averaging([probs1, probs2, probs3, probs4], weights1)
weighted_avg_acc1 = accuracy_score(y_test, final_pred_weighted_avg1)
print(f"Weighted Averaging Accuracy (Weights {weights1}): {weighted_avg_acc1:.4f}")

weights2 = [0.4, 0.3, 0.2, 0.1]
final_pred_weighted_avg2 = weighted_averaging([probs1, probs2, probs3, probs4], weights2)
weighted_avg_acc2 = accuracy_score(y_test, final_pred_weighted_avg2)
print(f"Weighted Averaging Accuracy (Weights {weights2}): {weighted_avg_acc2:.4f}")

weights3 = [0.1, 0.1, 0.4, 0.4]
final_pred_weighted_avg3 = weighted_averaging([probs1, probs2, probs3, probs4], weights3)
weighted_avg_acc3 = accuracy_score(y_test, final_pred_weighted_avg3)
print(f"Weighted Averaging Accuracy (Weights {weights3}): {weighted_avg_acc3:.4f}")

weights4 = [0.3, 0.3, 0.2, 0.2]
final_pred_weighted_avg4 = weighted_averaging([probs1, probs2, probs3, probs4], weights4)
weighted_avg_acc4 = accuracy_score(y_test, final_pred_weighted_avg4)
print(f"Weighted Averaging Accuracy (Weights {weights4}): {weighted_avg_acc4:.4f}")

# Models' individual predictions (not used here, commented for clarity)
# pred1 = model1.predict(X_test)
# pred2 = model2.predict(X_test)
# pred3 = model3.predict(X_test)
# pred4 = model4.predict(X_test)


Weighted Averaging Accuracy (Weights [0.2, 0.2, 0.2, 0.2]): 0.9067
Weighted Averaging Accuracy (Weights [0.4, 0.3, 0.2, 0.1]): 0.9000
Weighted Averaging Accuracy (Weights [0.1, 0.1, 0.4, 0.4]): 0.9267
Weighted Averaging Accuracy (Weights [0.3, 0.3, 0.2, 0.2]): 0.9033


In [12]:
from sklearn.metrics import accuracy_score

# Individual model predictions
pred1 = model1.predict(X_test)  # Random Forest
pred2 = model2.predict(X_test)  # Gradient Boosting
pred3 = model3.predict(X_test)  # Logistic Regression
pred4 = model4.predict(X_test)  # SVM

# Individual model accuracies
acc1 = accuracy_score(y_test, pred1)  # Random Forest accuracy
acc2 = accuracy_score(y_test, pred2)  # Gradient Boosting accuracy
acc3 = accuracy_score(y_test, pred3)  # Logistic Regression accuracy
acc4 = accuracy_score(y_test, pred4)  # SVM accuracy

# Ensemble methods' accuracies
final_pred_voting = max_voting([pred1, pred2, pred3, pred4])
voting_acc = accuracy_score(y_test, final_pred_voting)

# Using previous weighted averaging results
# Weighted Averaging Results
weighted_avg_acc1 = accuracy_score(y_test, weighted_averaging([probs1, probs2, probs3, probs4], [0.25, 0.25, 0.25, 0.25]))
weighted_avg_acc2 = accuracy_score(y_test, weighted_averaging([probs1, probs2, probs3, probs4], [0.4, 0.3, 0.2, 0.1]))
weighted_avg_acc3 = accuracy_score(y_test, weighted_averaging([probs1, probs2, probs3, probs4], [0.1, 0.1, 0.4, 0.4]))
weighted_avg_acc4 = accuracy_score(y_test, weighted_averaging([probs1, probs2, probs3, probs4], [0.3, 0.3, 0.2, 0.2]))

# Print all results
print("Accuracy of Model 1 (Random Forest):", acc1)
print("Accuracy of Model 2 (Gradient Boosting):", acc2)
print("Accuracy of Model 3 (Logistic Regression):", acc3)
print("Accuracy of Model 4 (SVM):", acc4)

print("\nEnsemble Accuracy (Max Voting):", voting_acc)
print("Ensemble Accuracy (Weighted Averaging - Equal Weights):", weighted_avg_acc1)
print("Ensemble Accuracy (Weighted Averaging - RF Focus):", weighted_avg_acc2)
print("Ensemble Accuracy (Weighted Averaging - LR & SVM Focus):", weighted_avg_acc3)
print("Ensemble Accuracy (Weighted Averaging - RF & GB Focus):", weighted_avg_acc4)


Accuracy of Model 1 (Random Forest): 0.8866666666666667
Accuracy of Model 2 (Gradient Boosting): 0.8866666666666667
Accuracy of Model 3 (Logistic Regression): 0.82
Accuracy of Model 4 (SVM): 0.93

Ensemble Accuracy (Max Voting): 0.8966666666666666
Ensemble Accuracy (Weighted Averaging - Equal Weights): 0.9066666666666666
Ensemble Accuracy (Weighted Averaging - RF Focus): 0.9
Ensemble Accuracy (Weighted Averaging - LR & SVM Focus): 0.9266666666666666
Ensemble Accuracy (Weighted Averaging - RF & GB Focus): 0.9033333333333333
