In [16]:
!pip install snscrape


Collecting snscrape
  Downloading snscrape-0.7.0.20230622-py3-none-any.whl (74 kB)
Installing collected packages: snscrape
Successfully installed snscrape-0.7.0.20230622


In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
import nltk
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
nltk.download('stopwords')
nltk.download('punkt')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import snscrape.modules.twitter as sntwitter
import pandas as pd

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\cyberchef\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\cyberchef\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [21]:
df = pd.read_csv('Tweets.csv')  # or the correct path if different

# Use only text + sentiment label
df = df[['text', 'airline_sentiment']]

# Drop missing values and keep binary labels only
df.dropna(inplace=True)

# Map sentiment to binary (Positive = 1, Neutral/Negative = 0)
df['label'] = df['airline_sentiment'].map({'positive': 1, 'neutral': 0, 'negative': 0})
df = df[['text', 'label']]

In [22]:
def clean_text(text):
    text = re.sub(r"http\S+|www\S+|https\S+", '', text)
    text = re.sub(r"@\w+|\#","", text)
    text = re.sub(r"[^A-Za-z0-9\s]", '', text)
    text = text.lower()
    text = " ".join([word for word in text.split() if word not in stopwords.words('english')])
    return text

df['clean_text'] = df['text'].apply(clean_text)

In [23]:
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(df['clean_text'])

sequences = tokenizer.texts_to_sequences(df['clean_text'])
padded = pad_sequences(sequences, maxlen=100)


In [24]:
X_train, X_test, y_train, y_test = train_test_split(padded, df['label'], test_size=0.2, random_state=42)

In [25]:
# Build LSTM Model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=10000, output_dim=128, input_length=100),
    tf.keras.layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 100, 128)          1280000   
                                                                 
 lstm_1 (LSTM)               (None, 128)               131584    
                                                                 
 dense_1 (Dense)             (None, 1)                 129       
                                                                 
Total params: 1,411,713
Trainable params: 1,411,713
Non-trainable params: 0
_________________________________________________________________


In [None]:
history = model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5

In [None]:
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.2f}")

In [None]:
def predict_sentiment(text):
    text = clean_text(text)
    seq = tokenizer.texts_to_sequences([text])
    pad = pad_sequences(seq, maxlen=100)
    pred = model.predict(pad)[0][0]
    return "Positive" if pred >= 0.5 else "Negative"


In [None]:
tweet_1 = "I absolutely loved the service today! The flight attendants were so friendly and helpful."
print(predict_sentiment(tweet_1))  # Expected output: "Positive"
tweet_2 = "Worst airline ever. Delayed for hours with no explanation. Never flying with them again."
print(predict_sentiment(tweet_2))  # Expected output: "Negative"
