In [4]:
import tensorflow as tf
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import pandas as pd

# Read data
data = pd.read_parquet('../data/input/training.parquet')
data_input = pd.read_parquet('../data/input/input00.parquet')

# Define features function
features = lambda d: d['HEADING'] + ' ' + d['SECTION']

# Prepare data
X = data.apply(features, axis=1)
y = data['CATEGORY']

# Convert string labels to numerical labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Vectorize text data
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_val_vec = vectorizer.transform(X_val)

# Define neural network model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X_train_vec, y_train, epochs=5, batch_size=32, validation_data=(X_val_vec, y_val))

# Make predictions
predictions = model.predict(X_val_vec)
predicted_classes = (predictions > 0.5).astype('int32')

# Inverse transform the predicted classes to get the original labels
predicted_labels = label_encoder.inverse_transform(predicted_classes.reshape(-1))

# Print the original labels and predicted labels
for true_label, predicted_label in zip(y_val, predicted_labels):
    print(f"True Label: {label_encoder.classes_[true_label]}, Predicted Label: {predicted_label}")

# Calculate and print accuracy
accuracy = accuracy_score(y_val, predicted_classes)
print(f"Validation Accuracy: {accuracy}")

Epoch 1/5
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.0730 - loss: -6370.1782 - val_accuracy: 0.0655 - val_loss: -114991.5391
Epoch 2/5
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0734 - loss: -247119.7031 - val_accuracy: 0.0655 - val_loss: -920550.8125
Epoch 3/5
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0717 - loss: -1309717.3750 - val_accuracy: 0.0655 - val_loss: -2888319.5000
Epoch 4/5
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0702 - loss: -3634286.2500 - val_accuracy: 0.0655 - val_loss: -6290135.5000
Epoch 5/5
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0756 - loss: -7330296.5000 - val_accuracy: 0.0655 - val_loss: -11282408.0000
[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 482us/step
True Label: cell-phones, Predicted Label: appliances


In [5]:
# Print only the predicted labels
for predicted_label in predicted_labels:
    print(f"Predicted Label: {predicted_label}")


Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: appliances
Predicted Label: app