In [6]:
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import re
from sklearn.preprocessing import LabelEncoder
from keras.models import load_model

# Load data
data = pd.read_csv('/Users/enochjoy/Desktop/UCM/Masters/Spring 2024/Neural Networks/Assignment 9/Sentiment (3).csv')
data = data[['text', 'sentiment']]

# Preprocess data
data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply(lambda x: re.sub('[^a-zA-Z0-9\s]', '', x))

max_features = 2000

# Tokenize text
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)
X = pad_sequences(X)

# Encode labels
labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['sentiment'])
y = to_categorical(integer_encoded)

# Split data into train and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Define model architecture
def create_model():
    model = Sequential()
    model.add(Embedding(max_features, 128))
    model.add(LSTM(196, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Create and train the model
batch_size = 32
model = create_model()
model.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=2)

# Save the model
model.save('model.keras')

# Load the model
loaded_model = load_model('model.keras')

# Predict on new data
new_text = "A lot of good things are happening. We are respected again throughout the world, and that's a great thing. @realDonaldTrump"
new_text = re.sub('[^a-zA-Z0-9\s]', '', new_text.lower())
new_seq = tokenizer.texts_to_sequences([new_text])
new_pad_seq = pad_sequences(new_seq, maxlen=X.shape[1])
predicted_probabilities = loaded_model.predict(new_pad_seq)
predicted_class_index = predicted_probabilities.argmax(axis=-1)[0]
predicted_sentiment = labelencoder.inverse_transform([predicted_class_index])[0]
print('Predicted sentiment:', predicted_sentiment)

291/291 - 5s - 18ms/step - accuracy: 0.6419 - loss: 0.8302
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
Predicted sentiment: Negative


In [8]:
pip install --upgrade tensorflow


Collecting tensorflow
  Obtaining dependency information for tensorflow from https://files.pythonhosted.org/packages/f9/14/67e9b2b2379cb530c0412123a674d045eca387dfcfa7db1c0028857b0a66/tensorflow-2.16.1-cp311-cp311-macosx_12_0_arm64.whl.metadata
  Downloading tensorflow-2.16.1-cp311-cp311-macosx_12_0_arm64.whl.metadata (4.1 kB)
Collecting h5py>=3.10.0 (from tensorflow)
  Obtaining dependency information for h5py>=3.10.0 from https://files.pythonhosted.org/packages/8d/70/2b0b99507287f66e71a6b2e66c5ad2ec2461ef2c534668eef96c3b48eb6d/h5py-3.10.0-cp311-cp311-macosx_11_0_arm64.whl.metadata
  Downloading h5py-3.10.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (2.5 kB)
Collecting ml-dtypes~=0.3.1 (from tensorflow)
  Obtaining dependency information for ml-dtypes~=0.3.1 from https://files.pythonhosted.org/packages/6e/a4/6aabb78f1569550fd77c74d2c1d008b502c8ce72776bd88b14ea6c182c9e/ml_dtypes-0.3.2-cp311-cp311-macosx_10_9_universal2.whl.metadata
  Using cached ml_dtypes-0.3.2-cp311-cp311-macosx_10_9

In [2]:
import pandas as pd
import re
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from keras.callbacks import EarlyStopping
from keras.models import load_model

# Load data
data = pd.read_csv('/Users/enochjoy/Desktop/UCM/Masters/Spring 2024/Neural Networks/Assignment 9/Sentiment (3).csv')
data = data[['text', 'sentiment']]

# Preprocess data
data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply(lambda x: re.sub('[^a-zA-Z0-9\s]', '', x))

max_features = 2000

# Tokenize text
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)
X = pad_sequences(X)

# Encode labels
labelencoder = LabelEncoder()
y = labelencoder.fit_transform(data['sentiment'])

# Split data into train and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Define model architecture
def create_model(dropout_rate=0.2):
    model = Sequential()
    model.add(Embedding(max_features, 128))
    model.add(LSTM(196, dropout=dropout_rate, recurrent_dropout=dropout_rate))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Custom wrapper class
class KerasClassifierWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, dropout_rate=0.2):
        self.dropout_rate = dropout_rate
        self.model = create_model(dropout_rate=self.dropout_rate)
    
    def fit(self, X, y):
        early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
        self.model.fit(X, y, callbacks=[early_stopping], epochs=1, batch_size=32, validation_split=0.2)
        return self
    
    def predict(self, X):
        return self.model.predict_classes(X)
    
    def set_params(self, **params):
        self.dropout_rate = params.get('dropout_rate', self.dropout_rate)
        self.model = create_model(dropout_rate=self.dropout_rate)
        return self
    
    def score(self, X, y):
        _, accuracy = self.model.evaluate(X, y, verbose=0)
        return accuracy
    
    def get_params(self, deep=True):
        return {'dropout_rate': self.dropout_rate}

# Define grid search parameters
param_grid = {
    'dropout_rate': [0.2, 0.3],
}

# Perform grid search
grid = GridSearchCV(estimator=KerasClassifierWrapper(), param_grid=param_grid, cv=3)
grid_result = grid.fit(X_train, Y_train)

# Print best parameters and best score
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

# Save the model
grid_result.best_estimator_.model.save('model.keras')

# Load the model
loaded_model = load_model('model.keras')

# Predict on new data
new_text = "A lot of good things are happening. We are respected again throughout the world, and that's a great thing. @realDonaldTrump"
new_text = re.sub('[^a-zA-Z0-9\s]', '', new_text.lower())
new_seq = tokenizer.texts_to_sequences([new_text])
new_pad_seq = pad_sequences(new_seq, maxlen=X.shape[1])
predicted_probabilities = loaded_model.predict(new_pad_seq)
predicted_class_index = predicted_probabilities.argmax(axis=-1)[0]
predicted_sentiment = labelencoder.inverse_transform([predicted_class_index])[0]
print('Predicted sentiment:', predicted_sentiment)

[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 20ms/step - accuracy: 0.6027 - loss: 0.9447 - val_accuracy: 0.6634 - val_loss: 0.8126
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 19ms/step - accuracy: 0.5962 - loss: 0.9359 - val_accuracy: 0.6594 - val_loss: 0.8150
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 20ms/step - accuracy: 0.5986 - loss: 0.9383 - val_accuracy: 0.6573 - val_loss: 0.8117
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 20ms/step - accuracy: 0.6105 - loss: 0.9362 - val_accuracy: 0.6586 - val_loss: 0.7961
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - accuracy: 0.5976 - loss: 0.9332 - val_accuracy: 0.6529 - val_loss: 0.8030
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 19ms/step - accuracy: 0.6053 - loss: 0.9371 - val_accuracy: 0.6589 - val_loss: 0.7991
[1m233/233[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 18ms/step - accuracy: 