In [None]:
!pip install tensorflow
!pip install pandas
!pip install matplotlib

In [1]:
import pandas as pd
import numpy as np
import zipfile
import requests
import io
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense


In [None]:
url = 'https://archive.ics.uci.edu/static/public/508/qsar+oral+toxicity.zip'

# downloading
respuesta = requests.get(url)

# verifying downloading
if respuesta.status_code == 200:
    # opening file .zip from respuesta
    with zipfile.ZipFile(io.BytesIO(respuesta.content)) as archivo_zip:
        # listing files
        print("Archivos en el ZIP:", archivo_zip.namelist())

        # extracting file
        nombre_csv = archivo_zip.namelist()[0]  # first file
        with archivo_zip.open(nombre_csv) as archivo_csv:
            # load .csv in pandas dataframe
            df = pd.read_csv(archivo_csv, sep= ';')
            print(df.head())  # first rows of dataframe
else:
    print("Download ERROR", respuesta.status_code)


In [None]:
# loading data
datos = df

# replace "positive" y "negative" for 1 and -1
datos['negative'] = datos['negative'].replace({'positive': 1, 'negative': -1})

# separating features and tags
X = datos.iloc[:, :-1].values  # all columns, except last
y = datos['negative'].values  # last column

# separate in volumes: training and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
def reglas_if_then(x):
    # rules based in binaries features
    if x[0] == 1 and x[1] == 0:
        return 1
    elif x[0] == 0 and x[1] == 1:
        return -1
    else:
        return 0  # no definied clasification

# applying rules to training data for create initial tag volume
y_train_reglas = np.array([reglas_if_then(x) for x in X_train])


In [None]:
# create KBANN model using Keras
modelo = Sequential()
modelo.add(Dense(10, input_dim=X.shape[1], activation='relu'))  # hide layer : 10 neuron
modelo.add(Dense(1, activation='tanh'))  # out layer

# compiling model
modelo.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])

# train the model with preprocessing data and definied tags for rules
modelo.fit(X_train, y_train_reglas, epochs=100, batch_size=10)


In [None]:
import matplotlib.pyplot as plt

def data_view(X, y):
    plt.scatter(X[:, 0], X[:, 1], c=y)
    plt.xlabel('Feature 2')
    plt.ylabel('Feature 1')
    plt.title('Training Data')
    plt.show()

data_view(X_train, y_train_reglas)


In [None]:
def test_learning(new_data):
    predictions = modelo.predict(new_data)
    return np.where(predictions > 0, 1, -1)  # clasification: positive or negative

# testing the learning model with new data
new_data = np.random.randint(2, size = (30, 1024))  # sample data (with 1024 columns)
results = test_learning(new_data)
print("Clasification Results:\n", results)
