In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#Python and the Natural Language Toolkit (NLTK) library

In [None]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
# Download NLTK data
nltk.download("punkt")
nltk.download("stopwords")
nltk.download("wordnet")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [None]:
df = pd.read_csv("/content/drive/MyDrive/amazon_product_data.csv")

In [None]:
X = df['review_body']
y = df['Sentiment_ebook']

In [None]:
# Data Preprocessing
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token.isalnum()]
    tokens = [token for token in tokens if token not in stop_words]
    return ' '.join(tokens)

X = X.apply(preprocess_text)

In [None]:
# TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=5000)  # You can adjust the max_features as needed
X = vectorizer.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train, y_train)

In [None]:
# Make predictions
y_pred = classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("\nConfusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", report)


Accuracy: 0.7

Confusion Matrix:
 [[ 0  6]
 [ 0 14]]

Classification Report:
               precision    recall  f1-score   support

    negaitve       0.00      0.00      0.00         6
    positive       0.70      1.00      0.82        14

    accuracy                           0.70        20
   macro avg       0.35      0.50      0.41        20
weighted avg       0.49      0.70      0.58        20



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#Sentiment analysis on Amazon product reviews using a neural network in Python with the TensorFlow and Keras libraries

In [None]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
df = pd.read_csv("/content/drive/MyDrive/amazon_product_data.csv")

In [None]:
X = df['review_body']
y = df['Sentiment_ebook']

In [None]:
# Data Preprocessing
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token.isalnum()]
    tokens = [token for token in tokens if token not in stop_words]
    return ' '.join(tokens)

X = X.apply(preprocess_text)

In [None]:
# Tokenization
tokenizer = Tokenizer(num_words=5000)  # You can adjust the num_words parameter
tokenizer.fit_on_texts(X)
X = tokenizer.texts_to_sequences(X)
X = pad_sequences(X, maxlen=200)  # You can adjust the maxlen parameter

# Encode labels
encoder = LabelEncoder()
y = encoder.fit_transform(y)

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build a neural network model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=128, input_length=200))
model.add(LSTM(128))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=64)
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.8826917409896851
Test Accuracy: 0.699999988079071


#Support Vector Machines (SVM) with TF-IDF vectorization for sentiment analysis

In [None]:
!pip install transformers
!pip install torch
!pip install pandas
!pip install nltk

Collecting transformers
  Downloading transformers-4.33.1-py3-none-any.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)
  Downloading huggingface_hub-0.17.1-py3-none-any.whl (294 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.8/294.8 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m36.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m45.7 MB/s[0m eta [36m0:00:0

In [None]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import torch
import torch.nn as nn
from tqdm import tqdm

In [None]:
df = pd.read_csv("/content/drive/MyDrive/amazon_product_data.csv")

In [None]:
X = df['review_body']
y = df['Sentiment_ebook']

In [None]:
# Data Preprocessing
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token.isalnum()]
    tokens = [token for token in tokens if token not in stop_words]
    return ' '.join(tokens)

X = X.apply(preprocess_text)

In [None]:
# Label Encoding
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
y = encoder.fit_transform(y)

In [None]:
# Tokenization and Encoding
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

input_ids = []
attention_masks = []

for sentence in X:
    encoded_dict = tokenizer.encode_plus(
                        sentence,                      # Sentence to encode
                        add_special_tokens = True,     # Add '[CLS]' and '[SEP]'
                        max_length = 128,              # Pad & truncate all sentences
                        pad_to_max_length = True,
                        return_attention_mask = True,  # Construct attention masks
                        return_tensors = 'pt',         # Return PyTorch tensors
                   )

    input_ids.append(encoded_dict['input_ids'])
    attention_masks.append(encoded_dict['attention_mask'])

input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(max_features=5000)  # Adjust 'max_features' as needed

# Fit and transform on training data
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)

# Transform test data using the same vectorizer
X_test_tfidf = tfidf_vectorizer.transform(X_test)


In [None]:
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

#SVM classifier
svm_classifier = LinearSVC()

# Train the classifier on the TF-IDF vectors
svm_classifier.fit(X_train_tfidf, y_train)

# Make predictions on the test set
y_pred = svm_classifier.predict(X_test_tfidf)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("\nConfusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", report)


Accuracy: 0.7

Confusion Matrix:
 [[ 0  6]
 [ 0 14]]

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         6
           1       0.70      1.00      0.82        14

    accuracy                           0.70        20
   macro avg       0.35      0.50      0.41        20
weighted avg       0.49      0.70      0.58        20



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#COMPARATIVE ANALYSIS


#**NLTK with a Simple Neural Network**
NLTK is a natural language processing library that provides tools for text analysis and sentiment analysis. In this approach, a basic neural network is used for sentiment classification after text preprocessing. It relies on manually engineered features and basic neural network layers. Typically includes accuracy, confusion matrix, precision, recall, and F1-score.

**Computation Time:** Training and prediction times are relatively low compared to deep learning models.

**Recommendation:** This approach is straightforward and quick to implement but may not yield state-of-the-art results for sentiment analysis tasks with complex text data.

#**TensorFlow/Keras with a Neural Network**

Utilizes the TensorFlow and Keras libraries to build and train a neural network model. It leverages word embeddings (e.g., Word2Vec, GloVe) and deep learning techniques to automatically learn features from text data. Typically includes accuracy, loss, and additional metrics like precision, recall, F1-score, and possibly AUC-ROC.

**Computation Time:** Training deep learning models can be computationally expensive and time-consuming, especially on large datasets.

**Recommendation:** Deep learning models like this one have the potential to achieve state-of-the-art results but require more computational resources and data. They are suitable for complex text analysis tasks.

#**Support Vector Machines (SVM) with TF-IDF Vectorization**

Uses the TF-IDF (Term Frequency-Inverse Document Frequency) vectorization technique to convert text data into numerical features. Then, a linear SVM classifier is trained on these features to perform sentiment classification. Typically includes accuracy, confusion matrix, precision, recall, F1-score, and margin (distance between support vectors).

**Computation Time:** Training and prediction times are relatively low compared to deep learning models, making SVM efficient for moderate to large datasets.

**Recommendation:** SVM with TF-IDF vectorization is a strong choice for text classification tasks like sentiment analysis. It offers good accuracy, interpretability, and efficiency, making it suitable for production deployment, especially with limited computational resources.

**Comaprative Analysis**

**Evaluation Metrics:** Deep learning models (TensorFlow/Keras) tend to offer slightly higher accuracy and more complex evaluation metrics due to their ability to capture intricate patterns in text data. However, SVM with TF-IDF also provides respectable accuracy and is easier to interpret.

**For production with limited computational resources and where interpretability is important, SVM with TF-IDF vectorization is recommended. It provides a good balance between accuracy and efficiency and is easier to maintain.**

*If you have access to substantial computational resources, a large dataset, and achieving the highest possible accuracy is critical, consider the TensorFlow/Keras neural network approach. It has the potential to outperform the other models but at a higher computational cost.*

In [None]:
!pip install openai

Collecting openai
  Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai
Successfully installed openai-0.28.0


In [None]:
import openai
your_key = "your_api"

In [None]:
def ask_gpt3(question, conversation=[]):
    conversation.append({"role": "system", "content": "You are a helpful assistant that provides information about products."})
    conversation.append({"role": "user", "content": question})

    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=conversation,
        api_key=your_key
    )

    assistant_reply = response['choices'][0]['message']['content']
    return assistant_reply

In [None]:
if __name__ == "__main__":
    user_question = input("enter the query")
    conversation_history = []
    assistant_reply = ask_gpt3(user_question, conversation_history)
    print("Assistant:", assistant_reply)

In [None]:
!pip install streamlit


Collecting streamlit
  Downloading streamlit-1.26.0-py2.py3-none-any.whl (8.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.1/8.1 MB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
Collecting pympler<2,>=0.9 (from streamlit)
  Downloading Pympler-1.0.1-py3-none-any.whl (164 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m164.8/164.8 kB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
Collecting tzlocal<5,>=1.1 (from streamlit)
  Downloading tzlocal-4.3.1-py3-none-any.whl (20 kB)
Collecting validators<1,>=0.2 (from streamlit)
  Downloading validators-0.22.0-py3-none-any.whl (26 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.36-py3-none-any.whl (189 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m189.5/189.5 kB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydeck<1,>=0.8 (from streamlit)
  Downloading pydeck-0.8.0-py2.py3-none-any.whl (4.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import streamlit as st

In [None]:
def main():
    st.title("Sentiment Analysis and Chatbot")
    st.sidebar.header("Chatbot")

    # User input for sentiment analysis
    user_input = st.text_area("Enter a text for sentiment analysis:")
    if st.button("Analyze Sentiment"):
        sentiment_result = perform_sentiment_analysis(user_input)
        st.write(f"Sentiment: {sentiment_result}")

    # Chatbot interaction
    user_question = st.text_input("Chatbot: Ask a question")
    if st.button("Ask GPT-3"):
        assistant_reply = ask_gpt3(user_question)
        st.write(f"Chatbot: {assistant_reply}")

if __name__ == "__main__":
    main()
Run your Streamlit app:
Open a terminal, navigate to the directory where app.py is located, and run the following command:

arduino
Copy code
streamlit run app.py
This will start a local web server, and you can access your web application in a web browser.

You can further customize the Streamlit app by adding more features, improving the user interface, and enhancing the interaction with your sentiment analysis model and GPT-3 chatbot.

Make sure to replace the placeholder code in the perform_sentiment_analysis and ask_gpt3 functions with your actual model inference code and GPT-3 interaction code.

Remember to keep your API keys and data paths secure when deploying this application to a production environment.







In [None]:
def query_processor(Query):
  prediction=svm_classifier.predict(Query)
  print(prediction)
  return prediction


In [None]:
def main():
  st.title("Bank Authenticator")
  html_temp ="""
  <div style="background-color: tomato; padding:10px">
  <h2 style="color: white; text-align:center;">Streamlit Bank Authenticator ML App </h2>
  </div>"""
  st.markdown (html_temp, unsafe_allow_html=True)
  query = st.text_input("Query", "Type Here")
  result=""
  if st.button("Predict"):
    result=query_processor(Query)
    st. success ('The output is {} .format(result)')
  if st.button("About"):
    st.text("Lets LEarn")
    st.text("Built with Streamlit")


In [None]:
if __name__ =='__main__':
  main()

2023-09-15 09:18:03.571 
  command:

    streamlit run /usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py [ARGUMENTS]
