In [60]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [61]:
def load_data(file_path):
    df = pd.read_csv(file_path)
    return df

In [62]:
def preprocess_data(df):
    # Separate features and labels
    X = df.drop('class', axis=1).values
    y = df['class'].values

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


    # Standardize features
    # scaler = StandardScaler()
    # X_train = scaler.fit_transform(X_train)
    # X_test = scaler.transform(X_test)

    return X_train, X_test, y_train, y_test

In [63]:
def build_model(input_shape):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=input_shape),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

In [64]:
def train_model(model, X_train, y_train, epochs=10, batch_size=32):
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size)

In [65]:
def evaluate_model(model, X_test, y_test):
    loss, accuracy = model.evaluate(X_test, y_test)
    print("Test Loss:", loss)
    print("Test Accuracy:", accuracy)

In [66]:

    # Path to the CSV file
    csv_file_path = 'data.csv'  # Change this to your CSV file path

    # Load the data
    df = load_data("malgenome.csv")

    # df.head()
    df = df.replace("S", 1)
    df = df.replace("B", 0)
    df.head()

    # Preprocess the data
    X_train, X_test, y_train, y_test = preprocess_data(df)

    print(X_train)

    # Build the model
    input_shape = (X_train.shape[1],)
    model = build_model(input_shape)

    # Train the model
    train_model(model, X_train, y_train)

    # Evaluate the model
    evaluate_model(model, X_test, y_test)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [1 1 1 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [1 1 1 ... 0 0 0]]
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.03129643574357033
Test Accuracy: 0.9947368502616882


In [67]:
from google.colab import data_table
from vega_datasets import data

In [68]:
data_table.disable_dataframe_formatter()

In [69]:
model.save('my_model.h5')

  saving_api.save_model(


In [70]:
from google.colab import files
files.download('my_model.h5')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>