In [1]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
import keras
import numpy as np

## Load Data

In [3]:
df = pd.read_csv("malgenome.csv")

## Clean Data

In [4]:
# Replace NaN Values
df = df.fillna(0)

# Convert float columns to int
int_columns = df.select_dtypes(include=['int']).columns
df[int_columns] = df[int_columns].astype(float)

In [6]:
X = df.drop(['class'], axis=1)
search_string = 'B'
result = X[X.eq(search_string).any(axis=1)]
result

Unnamed: 0,transact,bindService,onServiceConnected,ServiceConnection,android.os.Binder,READ_SMS,attachInterface,WRITE_SMS,TelephonyManager.getSubscriberId,Ljava.lang.Class.getCanonicalName,...,READ_EXTERNAL_STORAGE,Ljava.lang.Object.getClass,SET_ORIENTATION,DEVICE_POWER,EXPAND_STATUS_BAR,GET_TASKS,GLOBAL_SEARCH,GET_PACKAGE_SIZE,SET_PREFERRED_APPLICATIONS,android.intent.action.PACKAGE_CHANGED


In [7]:
search_string = 'B'

# Check if the search string exists in each cell
result = X.applymap(lambda x: search_string in str(x))

# Identify the columns where the search string exists
columns_with_string = result.any()

# Get the names of the columns
columns_containing_string = columns_with_string[columns_with_string].index.tolist()

print("Columns containing the string '{}': {}".format(search_string, columns_containing_string))

Columns containing the string 'B': []


  result = X.applymap(lambda x: search_string in str(x))


In [8]:
X['android.intent.action.NEW_OUTGOING_CALL'][3799] = 0.0

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  X['android.intent.action.NEW_OUTGOING_CALL'][3799] = 0.0


In [11]:
y = df['class']
y = y.replace({'S': 1, 'B': 0})

  y = y.replace({'S': 1, 'B': 0})


## Split Data into Train and Test

In [12]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
y_train = keras.utils.to_categorical(y_train,num_classes=2)
y_test = keras.utils.to_categorical(y_test,num_classes=2)

## Build the Neural Network

In [14]:
# Build the model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(215,)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(2, activation='sigmoid')
])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
model.compile(optimizer='adam',
                loss='binary_crossentropy',
                metrics=['accuracy'])

## Train the Neural Network

In [16]:
X_train = X_train.astype('float32')
y_train = y_train.astype('float32')
# Train the model
model.fit(X_train, y_train, epochs=10)

Epoch 1/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 665us/step - accuracy: 0.8448 - loss: 0.4072
Epoch 2/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 527us/step - accuracy: 0.9807 - loss: 0.0689
Epoch 3/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 591us/step - accuracy: 0.9913 - loss: 0.0402
Epoch 4/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 478us/step - accuracy: 0.9933 - loss: 0.0224
Epoch 5/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 471us/step - accuracy: 0.9978 - loss: 0.0127
Epoch 6/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 475us/step - accuracy: 0.9994 - loss: 0.0076
Epoch 7/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 548us/step - accuracy: 0.9998 - loss: 0.0045 
Epoch 8/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 599us/step - accuracy: 0.9998 - loss: 0.0040
Epoch 9/10
[1m95/95[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x34fb68b50>

## Evaluate the Neural Network

In [17]:
X_test = X_test.astype('float32')
y_test = y_test.astype('float32')

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 440us/step - accuracy: 0.9863 - loss: 0.0573
Test Loss: 0.03735211119055748
Test Accuracy: 0.99210524559021


## Test the Neural Network

In [18]:
predictions = model.predict(X_test[0:5])
predictions

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step


array([[6.9181630e-07, 9.9999923e-01],
       [4.4909378e-08, 9.9999994e-01],
       [3.5008411e-06, 9.9999696e-01],
       [1.9941821e-04, 9.9988681e-01],
       [9.8901409e-01, 9.3080467e-03]], dtype=float32)

In [19]:
true_labels = y_test[0:5]
true_labels

array([[0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.]], dtype=float32)

In [20]:
arr = np.array(["B", "S"])

In [21]:
arr[0]

'B'

In [22]:
for i in range(len(predictions)):
  print("Prediction:", arr[predictions[i].argmax()], ",",
        "True Label:", arr[true_labels[i].argmax()])

Prediction: S , True Label: S
Prediction: S , True Label: S
Prediction: S , True Label: S
Prediction: S , True Label: S
Prediction: B , True Label: B


## Save  and Convert the Neural Network to a tflite model

In [25]:
model.save("mymodel.keras")

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

In [None]:
with open('model.tflite', 'wb') as f:
  f.write(tflite_model)