In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score, cross_val_predict, GridSearchCV, train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC, SVR
from sklearn.metrics import confusion_matrix, classification_report, f1_score, r2_score, mean_squared_error, explained_variance_score
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet, RidgeClassifier
from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
from sklearn.kernel_ridge import KernelRidge
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier

df = pd.read_csv('C4T-cormmscaled.csv')
df.iloc[:, 1:] = np.round(df.iloc[:, 1:], 2)

df['Target'] = pd.cut(df['ThermalConductivity'], [0, 5, 5000], labels=[0, 1])

df['Target'].value_counts()

df.columns.values

df = df.sample(frac=1)

X = df.drop(columns=['Formula', 'ThermalConductivity', 'composition', 'Target'])
y = df['Target']

random_forest = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=15, min_samples_split=12,
                                       min_samples_leaf=5, max_features='log2', max_samples=0.8, class_weight='balanced', ccp_alpha=0.0015)

y_pred_rf = cross_val_predict(random_forest, X, y, cv=6)
pd.DataFrame(classification_report(y, y_pred_rf, output_dict=True))

pd.DataFrame(confusion_matrix(y, y_pred_rf), index=['Actual low', 'Actual high'], columns=['Predicted low', 'Predicted high'])

svc = SVC(kernel='sigmoid', C=100, gamma=0.1, coef0=2, class_weight={0: 2, 1: 1})

y_pred_svc = cross_val_predict(svc, X, y, cv=5)
pd.DataFrame(classification_report(y, y_pred_svc, output_dict=True))

pd.DataFrame(confusion_matrix(y, y_pred_svc), index=['Actual low', 'Actual high'], columns=['Predicted low', 'Predicted high'])

knn = KNeighborsClassifier(n_neighbors=4, weights='distance', p=1)

y_pred_knn = cross_val_predict(knn, X, y, cv=8)
pd.DataFrame(classification_report(y, y_pred_knn, output_dict=True))

pd.DataFrame(confusion_matrix(y, y_pred_knn), index=['Actual low', 'Actual high'], columns=['Predicted low', 'Predicted high'])

gradient_boosting = GradientBoostingClassifier(learning_rate=0.1, n_estimators=100, subsample=0.9, min_samples_split=10,
                                              min_samples_leaf=5, max_depth=8, max_features='sqrt', ccp_alpha=0.001)

y_pred_gb = cross_val_predict(gradient_boosting, X, y, cv=8)
pd.DataFrame(classification_report(y, y_pred_gb, output_dict=True))

pd.DataFrame(confusion_matrix(y, y_pred_gb), index=['Actual low', 'Actual high'], columns=['Predicted low', 'Predicted high'])

estimators = []
estimators.append(('RF', random_forest))
# estimators.append(('SVC', svc))
estimators.append(('KNN', knn))
estimators.append(('GB', gradient_boosting))

voting_classifier = VotingClassifier(estimators=estimators, weights=[55, 15, 30])

y_pred_voting = cross_val_predict(voting_classifier, X, y, cv=8)
pd.DataFrame(classification_report(y, y_pred_voting, output_dict=True))

pd.DataFrame(confusion_matrix(y, y_pred_voting), index=['Actual low', 'Actual high'], columns=['Predicted low', 'Predicted high'])

df['Predicted_Target'] = y_pred_voting
