In [25]:
import re
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import keras
from sklearn.metrics import accuracy_score

In [26]:
!pip install tweet-preprocessor

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [27]:
import preprocessor as p
import nltk
from nltk.stem import PorterStemmer
from nltk.stem import SnowballStemmer
porter_stemmer = PorterStemmer()
snowball_stemmer = SnowballStemmer(language='english')
nltk.download('punkt')
from nltk.tokenize import word_tokenize
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))
ps = PorterStemmer()
lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [28]:
def preprocess_tweet(text):
    text = p.clean(text)
    return text

def remove_urls(raw_text):
    raw_text = re.sub(r'http\S+', '', raw_text)
    return re.sub(r'www\S+', '', raw_text)

def remove_nonalpha(raw_text):
  return re.sub(r'[^a-zA-Z\s]', '', raw_text)

def remove_lines(raw_text):
  return re.sub(r'\n',' ',raw_text)

def lemme_stem_stop(raw_text):
  words = word_tokenize(raw_text)
  final_words=[]
  for word in words:
    word = word.lower()
    if word not in stop_words:
      word = snowball_stemmer.stem(word)
      final_words.append(word)
  return " ".join(final_words)

def pre_processing(raw_text):
  return lemme_stem_stop(remove_lines(remove_nonalpha(remove_urls(preprocess_tweet(raw_text)))))

In [86]:
class UNet():
    def __init__(self,path):
        self.path=path

    def load_data(self):
        data = pd.read_csv(self.path)
        data['text'] = data['text'].apply(lambda x: pre_processing(x))
        self.df = data
        self.length = self.df['text'].apply(lambda x: len(x.split(' ')))
        reviews = self.df["text"]
        labels = self.df["airline_sentiment"]
        encoder = LabelEncoder()
        self.encoded_labels = encoder.fit_transform(labels)
        self.train_sentences, self.test_sentences, self.train_labels, self.test_labels = train_test_split(reviews, self.encoded_labels, test_size=0.2, random_state=123)


    def _preprocess(self):
        self.vocab_size = 3000
        self.oov_tok = '<OOK>'
        self.embedding_dim = 100
        self.max_length = 150
        self.padding_type='post'
        self.trunc_type='post'
        self.tokenizer = Tokenizer(num_words = self.vocab_size, oov_token=self.oov_tok)
        self.tokenizer.fit_on_texts(self.train_sentences)
        word_index = self.tokenizer.word_index
        train_sequences = self.tokenizer.texts_to_sequences(self.train_sentences)
        self.train_padded = pad_sequences(train_sequences, padding='post', maxlen =self.max_length)
        test_sequences = self.tokenizer.texts_to_sequences(self.test_sentences)
        self.test_padded = pad_sequences(test_sequences, padding='post', maxlen=self.max_length)

    def build(self):
      self._preprocess()
      self.model =keras.Sequential([
          keras.layers.Embedding(self.vocab_size, self.embedding_dim, input_length=self.max_length),
          keras.layers.Bidirectional(keras.layers.LSTM(64)),
          keras.layers.Dense(24, activation='relu'),
          keras.layers.Dense(1, activation='sigmoid')])
      self.model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

    def train(self):
        self.history = self.model.fit(self.train_padded, self.train_labels, 
                    epochs=5, verbose=1,
                    validation_split=0.1)
        
    def evaluate(self):
      prediction = self.model.predict(self.test_padded)
      predict=[]
      for i in prediction:
        if i>=0.5:
          predict.append(1)
        else:
          predict.append(0)
      return "Accuracy of the built model is "+str(accuracy_score(predict,self.test_labels)*100)+" %"

    def prediction(self,sentences):
          processed_sentences=[]
          for sent in sentences:
            processed_sentences.append(pre_processing(sent))
            sequences = self.tokenizer.texts_to_sequences(processed_sentences)
            padded =  pad_sequences(sequences, padding='post', maxlen=self.max_length)

          predict = self.model.predict(padded)
          test_sent_prob=[]
          for i in predict:
            if i>=0.5:
              test_sent_prob.append(1)
            else:
              test_sent_prob.append(0)
          return test_sent_prob

In [87]:
def run(path):  
  model = UNet(path)
  model.load_data()
  model.build()
  model.train()
  print(model.evaluate())
  return model

In [88]:
path = "/content/airline_sentiment_analysis.csv"
model = run(path)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy of the built model is 90.34213945430922 %


In [89]:
sentences = ["@VirginAmerica yes, nearly every time I fly VX this ear worm wont go away :)", 
            "@VirginAmerica seriously would pay $30 a flight for seats that didn't have this playing. it's really the only bad thing about flying VA", 
            "@VirginAmerica it was amazing, and arrived an hour early. Youre too good to me."]

model.prediction(sentences)

[1, 0, 1]

In [90]:
!pip install fastapi nest-asyncio pyngrok uvicorn

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [96]:
def sentiment_analysis(sentence):
  prediction = model.prediction([sentence])
  if prediction[0]==0:
    return "Negative"
  else:
    return "Positive"

In [97]:
from pydantic import BaseModel
from fastapi import FastAPI
import json

class senti(BaseModel):
  sentence: str

app = FastAPI()

@app.get('/')
def index():
    return {'message': 'This is the service for performing sentiment analysis'}

@app.post('/predict')
def predict_sentiment(data:senti):
    prediction = sentiment_analysis(data.sentence)
    return {
        'prediction': prediction
    }

In [98]:
import nest_asyncio
from pyngrok import ngrok
import uvicorn

ngrok_tunnel = ngrok.connect(8000)
print('Public URL:', ngrok_tunnel.public_url)
nest_asyncio.apply()
uvicorn.run(app, port=8000)

Public URL: http://1951-34-133-40-188.ngrok.io


INFO:     Started server process [54]
INFO:uvicorn.error:Started server process [54]
INFO:     Waiting for application startup.
INFO:uvicorn.error:Waiting for application startup.
INFO:     Application startup complete.
INFO:uvicorn.error:Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)
INFO:uvicorn.error:Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


INFO:     54.86.50.139:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     54.86.50.139:0 - "POST /predict HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:uvicorn.error:Shutting down
INFO:     Waiting for application shutdown.
INFO:uvicorn.error:Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:uvicorn.error:Application shutdown complete.
INFO:     Finished server process [54]
INFO:uvicorn.error:Finished server process [54]
