In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import load_model
import pickle
import re
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('vader_lexicon')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\p_k_s\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\p_k_s\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\p_k_s\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\p_k_s\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
# Define a function to clean tweets
def clean_tweet(tweet):
    # Remove Twitter-specific entities
    tweet = re.sub(r'@[A-Za-z0-9_]+', '', tweet)
    tweet = re.sub(r'#', '', tweet)
    tweet = re.sub(r'https?:\/\/\S+', '', tweet)

    # Remove punctuation and special characters
    tweet = re.sub(r'[^\w\s]', '', tweet)

    # Tokenize the tweet
    tokens = word_tokenize(tweet)

    # Remove stop words
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [token for token in tokens if not token.lower() in stop_words]

    # Lemmatize the tokens
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens]

    # Convert tokens to lowercase and join them into a string
    cleaned_tweet = " ".join([token.lower() for token in lemmatized_tokens])

    return cleaned_tweet

In [6]:
# Load the tokenizer and model from file
with open('tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

# Load the trained model
loaded_model = load_model('sentiment_analysis_model.h5')

# Get input from the user
input_text = input('Enter some text: ')

# Preprocess the input text
input_sequence = tokenizer.texts_to_sequences([input_text])
max_length = loaded_model.layers[0].input_length
input_data = pad_sequences(input_sequence, maxlen=max_length, padding='post')

# Make a prediction using the loaded model
score = loaded_model.predict(input_data)[0][0]
if score >= 0.6:
    print('very good')
elif score >= 0.2:
    print('good')
elif score > -0.2:
    print('neutral')
elif score > -0.6:
    print('bad')
else:
    print('very bad')


neutral
