In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv1D, MaxPooling1D
from sklearn.preprocessing import LabelEncoder
import joblib
import tensorflow as tf

# Set random seed for reproducibility
np.random.seed(42)

# Load the dataset
data = pd.read_excel("C:\\Users\\91934\\Desktop\\Aug Hackfest\\Genetic and clinical data.xlsx")

# Encode categorical variables
label_encoder = LabelEncoder()
data['genetic_changes_brca'] = label_encoder.fit_transform(data['genetic_changes_brca'])
data['genetic_changes_rad51'] = label_encoder.fit_transform(data['genetic_changes_rad51'])
data['sub_type'] = label_encoder.fit_transform(data['sub_type'])
data['stage'] = label_encoder.fit_transform(data['stage'])
data['family_history'] = label_encoder.fit_transform(data['family_history'])
# Encode other categorical columns in a similar way...

# Split the dataset into features and target
X = data[['genetic_changes_brca', 'genetic_changes_rad51', 'sub_type', 'stage', 'family_history']]  # Add other features as needed
y = data['outlier']

# Convert labels to binary format
y_binary = y.replace({"Yes": 1, "No": 0})

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.3, random_state=42)

# Train a CNN model
X_train_reshaped = np.array(X_train).reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_reshaped = np.array(X_test).reshape(X_test.shape[0], X_test.shape[1], 1)

cnn_model = Sequential()
cnn_model.add(Conv1D(32, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
cnn_model.add(MaxPooling1D(pool_size=2))
cnn_model.add(Flatten())
cnn_model.add(Dense(1, activation='sigmoid'))

cnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
cnn_model.fit(X_train_reshaped, y_train, epochs=20, batch_size=32, validation_data=(X_test_reshaped, y_test))

# Save the CNN model
cnn_model.save('cnn_model.h5')

# Train a decision tree model
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)

# Save the decision tree model
joblib.dump(dt_model, 'dt_model.joblib')

# Integration of decision tree and CNN for prediction
def predict_outlier_ensemble(input_data):
    dt_model = joblib.load('dt_model.joblib')
    cnn_model = tf.keras.models.load_model('cnn_model.h5')
    dt_prediction = dt_model.predict(input_data)[0]
    cnn_input = input_data.reshape(1, input_data.shape[1], 1)
    cnn_prediction = np.round(cnn_model.predict(cnn_input))[0][0]
    ensemble_prediction = np.mean([dt_prediction, cnn_prediction])
    return int(ensemble_prediction)

# Example prediction using ensemble
input_data = np.array([1, 0, 1, 0, 1]).reshape(1, 5)
ensemble_prediction = predict_outlier_ensemble(input_data)
print("\033[1m" + f"Ensemble Prediction among Decision Tree and CNN: {ensemble_prediction}" + "\033[0m")


Epoch 1/20


  y_binary = y.replace({"Yes": 1, "No": 0})
  super().__init__(


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - accuracy: 0.4881 - loss: 0.7091 - val_accuracy: 0.4867 - val_loss: 0.6969
Epoch 2/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.4862 - loss: 0.7037 - val_accuracy: 0.4800 - val_loss: 0.6917
Epoch 3/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.4851 - loss: 0.6981 - val_accuracy: 0.5267 - val_loss: 0.6901
Epoch 4/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.4949 - loss: 0.6938 - val_accuracy: 0.5400 - val_loss: 0.6903
Epoch 5/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.4528 - loss: 0.6957 - val_accuracy: 0.5733 - val_loss: 0.6905
Epoch 6/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5237 - loss: 0.6942 - val_accuracy: 0.5400 - val_loss: 0.6902
Epoch 7/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1mEnsemble Prediction among Decision Tree and CNN: 0[0m
