In [None]:
pip install tensorflow

In [None]:
pip install keras

In [None]:
pip install textblob

In [None]:
pip install scikit-learn 

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from textblob import TextBlob
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, Conv1D, MaxPooling1D, Dropout
from keras.utils import to_categorical

# Load dataset
df = pd.read_csv('sentiment_data.csv')

# Preprocessing
df.drop_duplicates(inplace=True)
df.dropna(inplace=True)

# Add TextBlob sentiment analysis scores as features
df['polarity'] = df['Tweet'].apply(lambda x: TextBlob(x).sentiment.polarity)
df['subjectivity'] = df['Tweet'].apply(lambda x: TextBlob(x).sentiment.subjectivity)

# Split data into train and test sets
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

# Tokenize and pad sequences
tokenizer = Tokenizer(num_words=10000, lower=True)
tokenizer.fit_on_texts(train_data['Tweet'])
x_train = pad_sequences(tokenizer.texts_to_sequences(train_data['Tweet']), maxlen=100)
x_test = pad_sequences(tokenizer.texts_to_sequences(test_data['Tweet']), maxlen=100)

# Define labels for classification
y_train = train_data['label'].values
y_test = test_data['label'].values

# Define CNN model
model = Sequential()
model.add(Embedding(10000, 128, input_length=100))
model.add(Conv1D(64, 5, activation='relu'))
model.add(MaxPooling1D(pool_size=4))
model.add(LSTM(128))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=10, batch_size=64)

# Evaluate the model on test data
loss, accuracy = model.evaluate(x_test, y_test, verbose=False)
print(f'Test accuracy: {accuracy*100}')

# Classify new tweets using the trained model and TextBlob sentiment analysis scores
new_tweets = ['This is a Fake news tweet!', 'This is a Real news tweet', 'This is a Neutral news tweet.']
new_tweet_features = pd.DataFrame({'text': new_tweets})
new_tweet_features['polarity'] = new_tweet_features['text'].apply(lambda x: TextBlob(x).sentiment.polarity)
new_tweet_features['subjectivity'] = new_tweet_features['text'].apply(lambda x: TextBlob(x).sentiment.subjectivity)
new_tweet_sequences = pad_sequences(tokenizer.texts_to_sequences(new_tweet_features['text']), maxlen=100)
new_tweet_probabilities = model.predict(new_tweet_sequences)

# Print the predicted labels for the new tweets
for i in range(len(new_tweets)):
    if new_tweet_probabilities[i][0] < 0:
        print(f'Tweet "{new_tweets[i]}" is classified as FAKE')
    elif new_tweet_probabilities[i][0] > 0:
        print(f'Tweet "{new_tweets[i]}" is classified as REAL')
    else:    
        print(f'Tweet "{new_tweets[i]}" is classified as Neutral')


In [None]:
# Load the old tweets dataset
old_tweet_features = pd.read_csv('after_pre.csv')

# Load the tokenizer and model
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(old_tweet_features['Tweet'])

# Generate random sample of tweets from the dataset for testing
test_sample = old_tweet_features.sample(n=25)

# Classify the sample tweets using the trained model and TextBlob sentiment analysis scores
test_sample_sequences = pad_sequences(tokenizer.texts_to_sequences(test_sample['Tweet']), maxlen=100)
test_sample_probabilities = model.predict(test_sample_sequences)

# Regenerate the predicted probabilities with the correct shape
test_sample_probabilities = model.predict(test_sample_sequences)

# Create a new column for predicted labels in the test sample DataFrame
test_sample['label'] = ['REAL' if p > 0 else ('FAKE' if p < 0 else 'NEUTRAL') for p in test_sample_probabilities]


# Print the predicted labels for the sample tweets
for i in range(len(test_sample)):
    if test_sample_probabilities[i][0] < 0:
        print(f'Tweet "{test_sample.iloc[i]["Tweet"]}" is classified as FAKE')
    elif test_sample_probabilities[i][0] > 0:
        print(f'Tweet "{test_sample.iloc[i]["Tweet"]}" is classified as REAL')
    else:
        print(f'Tweet "{test_sample.iloc[i]["Tweet"]}" is classified as Neutral')