In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/amazon-reviews/amazon_review_polarity_csv.tgz
/kaggle/input/amazon-reviews/train.csv
/kaggle/input/amazon-reviews/test.csv


In [2]:
import tensorflow as tf
import io
from tensorflow import keras
from tensorflow.keras.layers import TextVectorization,Dense,Embedding,GlobalAveragePooling1D,LSTM,Dropout
from tensorflow.keras import Sequential
from tensorflow.data import Dataset,AUTOTUNE
from keras.callbacks import EarlyStopping

In [3]:
VOCAB_SIZE = 5000
SEQUENCE_LENGTH = 50 #258

In [4]:
class Amazon_reviews:
    def __init__(self,path,batch=512,train_test_split=0.8)->None:
        data = pd.read_csv(path,header=None)
        self._process(data)
        self.ratio = train_test_split
        self.batch = batch
    def _process(self,data):
        X = data[1].astype(str) + " " + data[2].astype(str)
        Y = data[0].astype(np.float32) - 1
        self.len = len(X)
        self.dataset = Dataset.from_tensor_slices((X,Y))
        
    def get_train_dataset(self)->Dataset:
        return self.dataset.take(
            int(self.len*self.ratio)).batch(self.batch).shuffle(self.len).prefetch(AUTOTUNE)
    def get_val_dataset(self)->Dataset:
        return self.dataset.skip(
            int(self.len*self.ratio)).batch(self.batch).prefetch(AUTOTUNE)
    

In [5]:
amazon_reviews = Amazon_reviews("/kaggle/input/amazon-reviews/train.csv")

2023-02-11 08:54:56.683788: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-11 08:54:56.780074: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-11 08:54:56.780881: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-11 08:54:56.783342: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

In [6]:
vectorize_layer = TextVectorization(
    max_tokens=VOCAB_SIZE,
    output_mode="int",
    output_sequence_length = SEQUENCE_LENGTH,
)
vectorize_layer.adapt(amazon_reviews.get_train_dataset().map(lambda x,y:x))

2023-02-11 08:55:01.578802: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


In [7]:
model = Sequential([
    vectorize_layer,
    Embedding(VOCAB_SIZE,8),
    keras.layers.Bidirectional(LSTM(8)),
    Dropout(0.2),
    Dense(16,activation='relu'),
    Dense(1,activation='tanh')
])

In [8]:
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2)


In [9]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'],
              )

In [10]:
model.fit(
    amazon_reviews.get_train_dataset(),
    validation_data=amazon_reviews.get_val_dataset(),
    callbacks=[early_stop],
    epochs=5)

Epoch 1/5


2023-02-11 08:56:20.031329: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f76b11ddf50>

In [11]:
tf.keras.models.save_model(model,"sentiment")

2023-02-11 09:07:27.089345: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
