Load Data

In [None]:
import pandas as pd
from google.cloud import storage

# Initialize GCS client
storage_client = storage.Client()
bucket_name = 'storageforreview'
file_name = 'gs://storageforreview/company_review.csv'

# Load the dataset
df = pd.read_csv(file_name)

print("Data loaded successfully.")


Explore Data

In [None]:
# Display basic information about the dataset
# Print the column names of the dataset
print(df.columns)
print(df.info())
print(df.head())


Explore Data:

In [None]:
# Display basic information about the dataset
print(df.info())
print(df.head())


Clean Data

In [None]:
# Example: Drop missing values
df = df.dropna()

# Example: Remove duplicates
df = df.drop_duplicates()
print("Data cleaned successfully")


Preprocess Data

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Extract text and sentiment columns
texts = df['reviewText']  
labels = df['overall'] 

# Convert ratings to sentiment labels
# Assuming a rating of 4 or 5 is positive sentiment, and 1 to 3 is negative sentiment
sentiment_map = {1: 'negative', 2: 'negative', 3: 'negative', 4: 'positive', 5: 'positive'}
df['sentiment'] = df['overall'].map(sentiment_map)

# Encode labels
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['sentiment'])

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(df['reviewText'], df['label'], test_size=0.2, random_state=42)

# Print first few rows to verify
print(df.head())

# Initialize the Tokenizer
tokenizer = Tokenizer(oov_token='<OOV>')  # Out-Of-Vocabulary token
tokenizer.fit_on_texts(X_train)

# Convert text to sequences
X_train_sequences = tokenizer.texts_to_sequences(X_train)
X_val_sequences = tokenizer.texts_to_sequences(X_val)

# Define the maximum length for padding
max_length = max(max(len(seq) for seq in X_train_sequences), max(len(seq) for seq in X_val_sequences))

# Pad sequences
X_train_padded = pad_sequences(X_train_sequences, maxlen=max_length, padding='post', truncating='post')
X_val_padded = pad_sequences(X_val_sequences, maxlen=max_length, padding='post', truncating='post')

# Print shapes of the datasets
print(f"Training data shape: {X_train_padded.shape}, Validation data shape: {X_val_padded.shape}")

print("Data preprocessed successfully")


 Train and Evaluate the Sentiment Analysis Model

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# Extract text and sentiment columns
texts = df['reviewText']
labels = df['overall']

# Convert ratings to sentiment labels
# Assuming a rating of 4 or 5 is positive sentiment, and 1 to 3 is negative sentiment
sentiment_map = {1: 'negative', 2: 'negative', 3: 'negative', 4: 'positive', 5: 'positive'}
df['sentiment'] = df['overall'].map(sentiment_map)

# Encode labels
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['sentiment'])

# Tokenize text data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
maxlen = 1579
X = pad_sequences(sequences, maxlen=maxlen)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, df['label'], test_size=0.2, random_state=42)

# Print first few rows to verify
print(df.head())
print("Data preprocessed successfully")

# Build the model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=128))
model.add(LSTM(units=128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(units=1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_val, y_val))

# Evaluate the model
val_loss, val_accuracy = model.evaluate(X_val, y_val)
print("Validation Accuracy: {val_accuracy:.2f}")

# Save the model
model.save('sentiment_model.keras')
print("Model saved successfully")


Upload the model

In [None]:
from google.cloud import storage
import tensorflow as tf
import os

# Initialize the GCS client
storage_client = storage.Client()
bucket_name = 'storageforreview'
bucket = storage_client.bucket(bucket_name)

# Define paths
local_model_path = 'sentiment_model.keras'
saved_model_path = 'sentiment_model_saved'
blob_path = 'models/sentiment_model_saved'

# Upload the Keras model file to GCS
blob = bucket.blob(f'models/{local_model_path}')
blob.upload_from_filename(local_model_path)
print("Keras model uploaded to GCS successfully.")

# Load the Keras model
model = tf.keras.models.load_model(local_model_path)
print("Keras model loaded.")

# Save as a SavedModel format
model.save(saved_model_path)
print("Model converted to SavedModel format and saved.")

# Upload the SavedModel to GCS
for root, dirs, files in os.walk(saved_model_path):
    for file in files:
        local_file_path = os.path.join(root, file)
        relative_path = os.path.relpath(local_file_path, saved_model_path)
        blob = bucket.blob(f'{blob_path}/{relative_path}')
        blob.upload_from_filename(local_file_path)
        print(f'Uploaded {local_file_path} to {blob.path}.')

print("SavedModel uploaded to GCS successfully.")


In [None]:
from google.cloud import aiplatform

# Initialize the Vertex AI client
aiplatform.init(project='sentimentanalysis-429522', location='us-central1')

# List all models
models = aiplatform.Model.list()

# Print model details
for model in models:
    print(f"Model ID: {model.name}, Model Display Name: {model.display_name}")

Create end points

In [None]:
from google.cloud import aiplatform

# Initialize the Vertex AI client
aiplatform.init(project='sentimentanalysis-429522', location='us-central1')

# Create an endpoint
endpoint = aiplatform.Endpoint.create(display_name='sentiment-analysis-endpoint')
print(f"Endpoint created: {endpoint.name}")


Deploy the model to the endpoint

In [None]:
# Deploy the model to the endpoint
model = aiplatform.Model('7473346449133010944')
deployed_model = model.deploy(
    endpoint=endpoint,
    machine_type='n1-standard-4',  # Adjust machine type as needed
)
print(f"Model deployed to endpoint: {deployed_model}")


Prepare a sample request

In [None]:

from google.cloud import aiplatform

# Initialize the Vertex AI Endpoin
#endpoint = aiplatform.Endpoint(endpoint_name="YOUR_ENDPOINT_NAME")

# Define the instances with the expected input format
instances = [
    {'reviewText': 'The product is excellent and I am very satisfied.'}
]

# Make a prediction request
try:
    predictions = endpoint.predict(instances=instances)
    print(f"Predictions: {predictions}")
except Exception as e:
    print(f"An error occurred: {e}")

