In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_predict
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import VotingClassifier

# Read and preprocess the data
df = pd.read_csv('C4T-cormmscaled.csv')
df.iloc[:, 1:] = np.round(df.iloc[:, 1:], 2)
df['Class'] = pd.cut(df['ThermalConductivity'], [0, 2, 5000], labels=[0, 1])
df = df.sample(frac=1) 

# Define features and target
X = df.drop(columns=['Formula', 'ThermalConductivity', 'composition', 'Class'])
y = df['Class']

# Random Forest Classifier
rf = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=15, min_samples_split=12, min_samples_leaf=5, max_features='log2', max_samples=0.8, class_weight='balanced', ccp_alpha=0.0015)
y_pred1 = cross_val_predict(rf, X, y, cv=6)

# Support Vector Classifier (SVC)
svc = SVC(kernel='sigmoid', C=100, gamma=0.1, coef0=2, class_weight={0: 2, 1: 1})
y_pred2 = cross_val_predict(svc, X, y, cv=5)

# Gradient Boosting Classifier
gb = GradientBoostingClassifier(learning_rate=0.1, n_estimators=100, subsample=0.9, min_samples_split=10, min_samples_leaf=5, max_depth=8, max_features='sqrt', ccp_alpha=0.001)
y_pred3 = cross_val_predict(gb, X, y, cv=8)

# Ensemble Model - Voting Classifier
estimators = [('RF', rf), ('GB', gb)]
vote = VotingClassifier(estimators=estimators, weights=[60, 40])
y_pred = cross_val_predict(vote, X, y, cv=8)

# Add Predictions to the DataFrame
df['pred2'] = y_pred
