In [60]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.pipeline import make_pipeline

# Load the datasets
fake_df = pd.read_csv('/kaggle/input/fake-news-dataset/Fake.csv')#[['text', 'label']]
true_df = pd.read_csv('/kaggle/input/fake-news-dataset/True.csv')#[['text', 'label']]

# Add 'label' column and assign values
fake_df['label'] = 0  # 0 for fake news
true_df['label'] = 1  # 1 for true news

# Select only the 'text' and 'label' columns
fake_df = fake_df[['text', 'label']]
true_df = true_df[['text', 'label']]

# Combine the datasets
combined_df = pd.concat([fake_df, true_df], ignore_index=True)

# Shuffle the combined dataset
combined_df = combined_df.sample(frac=1).reset_index(drop=True)

In [61]:
combined_df.shape

(44898, 2)

In [34]:


# Shuffle the combined dataset
combined_df = combined_df.sample(frac=1).reset_index(drop=True)
combined_df.head()


Unnamed: 0,text,label
0,WASHINGTON (Reuters) - U.S. President-elect Do...,1
1,WASHINGTON (Reuters) - Congress should launch ...,1
2,As Barack Hussein Obama tours around the count...,0
3,John Oliver absolutely humiliated Mitch McConn...,0
4,It s no secret Americans have been demonstrati...,0


In [37]:
# Check for null values
null_counts = combined_df.isnull().sum()
print("Null Value Counts:")
print(null_counts)

Null Value Counts:
text     0
label    0
dtype: int64


In [38]:
# Remove rows with null values
combined_df.dropna(inplace=True)

In [39]:
# Check if null values are removed
null_counts_after = combined_df.isnull().sum()
print("Null Value Counts After Removing:")
print(null_counts_after)

Null Value Counts After Removing:
text     0
label    0
dtype: int64


In [41]:
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# Download NLTK resources
import nltk
nltk.download('punkt')
nltk.download('stopwords')

# Define function for text preprocessing
def preprocess_text(text):
    # Tokenization
    tokens = word_tokenize(text)
    
    # Lowercasing
    tokens = [token.lower() for token in tokens]
    
    # Remove punctuation and special characters
    tokens = [re.sub(r'[^\w\s]', '', token) for token in tokens]
    
    # Remove stop words
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    
    # Join tokens back
    preprocessed_text = ' '.join(tokens)
    
    return preprocessed_text

# Apply preprocessing function to the 'text' column
combined_df=combined_df.head(50)
combined_df['text'] = combined_df['text'].apply(preprocess_text)

# Print sample preprocessed text
print("Sample Preprocessed Text:")
print(combined_df['text'].head())


[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
Sample Preprocessed Text:
0    washington  reuters   us presidentelect donald...
1    washington  reuters   congress launch bipartis...
2    barack hussein obama tours around country tryi...
3    john oliver absolutely humiliated mitch mcconn...
4    secret americans demonstrating complete lack i...
Name: text, dtype: object


In [44]:
combined_df.shape

(50, 2)

In [47]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder

# Drop blank rows if any
combined_df.dropna(inplace=True)

# Select a desired number of rows
desired_rows = 1000  # Change this to your desired number of rows
combined_df_subset = combined_df.head(desired_rows)

# Feature extraction using TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(combined_df_subset['text']).toarray()

# Encode labels from combined_df_subset
encoder = LabelEncoder()
y = encoder.fit_transform(combined_df_subset['label'])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [48]:
 # adaboost
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import classification_report

# Initialize and train AdaBoost classifier
ada = AdaBoostClassifier(n_estimators=100)
ada.fit(X_train, y_train)

# Predictions and evaluation
y_pred = ada.predict(X_test)
print("AdaBoost Classification Report:\n", classification_report(y_test, y_pred))


AdaBoost Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         6
           1       1.00      1.00      1.00         4

    accuracy                           1.00        10
   macro avg       1.00      1.00      1.00        10
weighted avg       1.00      1.00      1.00        10



In [49]:
# GBM
from sklearn.ensemble import GradientBoostingClassifier

# Initialize and train GBM classifier
gbm = GradientBoostingClassifier(n_estimators=100)
gbm.fit(X_train, y_train)

# Predictions and evaluation
y_pred = gbm.predict(X_test)
print("GBM Classification Report:\n", classification_report(y_test, y_pred))


GBM Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         6
           1       1.00      1.00      1.00         4

    accuracy                           1.00        10
   macro avg       1.00      1.00      1.00        10
weighted avg       1.00      1.00      1.00        10



In [50]:
# XGBM
import xgboost as xgb

# Initialize and train XGBoost classifier
xgb_model = xgb.XGBClassifier(n_estimators=100)
xgb_model.fit(X_train, y_train)

# Predictions and evaluation
y_pred = xgb_model.predict(X_test)
print("XGBoost Classification Report:\n", classification_report(y_test, y_pred))


XGBoost Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         6
           1       1.00      1.00      1.00         4

    accuracy                           1.00        10
   macro avg       1.00      1.00      1.00        10
weighted avg       1.00      1.00      1.00        10



In [52]:
# CNN 4 layers
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten, Dense, Dropout

# Tokenizing and padding sequences
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(combined_df['text'])
X_seq = tokenizer.texts_to_sequences(combined_df['text'])
X_pad = pad_sequences(X_seq, maxlen=100)

# Train-test split for CNN
X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42)

# Build 4-layer CNN model
model_4 = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=100),
    Conv1D(128, 5, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid')
])

model_4.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_4.summary()

# Train the model
model_4.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

# Evaluate the model
loss, accuracy = model_4.evaluate(X_test, y_test)
print(f'4-Layer CNN Accuracy: {accuracy}')




Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4s/step - accuracy: 0.4375 - loss: 0.6942

I0000 00:00:1718039305.345994     544 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - accuracy: 0.4375 - loss: 0.6942 - val_accuracy: 0.7500 - val_loss: 0.6055
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.5625 - loss: 0.6116 - val_accuracy: 0.7500 - val_loss: 0.5978
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.9688 - loss: 0.4983 - val_accuracy: 0.7500 - val_loss: 0.6037
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - accuracy: 0.9688 - loss: 0.3828 - val_accuracy: 0.7500 - val_loss: 0.5990
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.9688 - loss: 0.2802 - val_accuracy: 0.7500 - val_loss: 0.5786
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 487ms/step - accuracy: 0.5000 - loss: 0.6719
4-Layer CNN Accuracy: 0.5


In [53]:
# CNN 6 layers
# Build 6-layer CNN model
model_6 = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=100),
    Conv1D(128, 5, activation='relu'),
    MaxPooling1D(pool_size=2),
    Conv1D(64, 5, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

model_6.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_6.summary()

# Train the model
model_6.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

# Evaluate the model
loss, accuracy = model_6.evaluate(X_test, y_test)
print(f'6-Layer CNN Accuracy: {accuracy}')


Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step - accuracy: 0.5000 - loss: 0.6935 - val_accuracy: 0.7500 - val_loss: 0.6838
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.8125 - loss: 0.6719 - val_accuracy: 0.7500 - val_loss: 0.6682
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.6875 - loss: 0.6594 - val_accuracy: 0.7500 - val_loss: 0.6547
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.7500 - loss: 0.6363 - val_accuracy: 0.7500 - val_loss: 0.6384
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.6875 - loss: 0.5899 - val_accuracy: 0.7500 - val_loss: 0.6256
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 541ms/step - accuracy: 0.6000 - loss: 0.6595
6-Layer CNN Accuracy: 0.6000000238418579


# Text GAN


In [75]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from concurrent.futures import ThreadPoolExecutor, as_completed
from IPython.display import display



class TextDataset(Dataset):
    def __init__(self, texts, tokenizer, max_length):
        self.texts = texts
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten()
        }

def load_datasets(true_path, fake_path):
    texts =combined_df['text']
    labels = combined_df['label']
    return texts, labels

def create_dataloader(texts, tokenizer, max_length, batch_size):
    dataset = TextDataset(texts, tokenizer, max_length)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)

def generate_text_gpt2(model, tokenizer, prompt, max_new_tokens=50):
    try:
        inputs = tokenizer.encode(prompt, return_tensors='pt', add_special_tokens=True, max_length=1024, truncation=True)
        attention_mask = torch.ones(inputs.shape)
        
        # Check tensor dimensions and values before moving to device
        if inputs.shape[1] > 1024:
            raise ValueError(f"Input sequence length exceeds the maximum length of 1024 tokens: {inputs.shape[1]}")
        
        inputs = inputs.to(device)
        attention_mask = attention_mask.to(device)
        
        outputs = model.generate(inputs, attention_mask=attention_mask, max_new_tokens=max_new_tokens, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
        return tokenizer.decode(outputs[0], skip_special_tokens=True)
    except Exception as e:
        print(f"Error generating text for input: {prompt[:50]}...: {e}")
        return None

def generate_and_add_texts_to_dataset(model, tokenizer, texts, labels, label, max_new_tokens=50, subset_size=100):
    new_texts = []
    with ThreadPoolExecutor() as executor:
        futures = [
            executor.submit(generate_text_gpt2, model, tokenizer, text, max_new_tokens)
            for text in texts[:subset_size]
        ]
        for idx, future in enumerate(as_completed(futures)):
            try:
                generated_text = future.result()
                if generated_text:
                    new_texts.append(generated_text)
                    labels.append(label)
                else:
                    print(f"Skipped text at index {idx} due to generation error.")
            except Exception as e:
                print(f"Error processing text at index {idx}: {e}")
    texts.extend(new_texts)
    return texts, labels

if __name__ == "__main__":
    try:
        # Paths to the datasets
        true_path = '/kaggle/input/fake-news-dataset/True.csv'
        fake_path = '/kaggle/input/fake-news-dataset/Fake.csv'

        # Load datasets
        texts, labels = load_datasets(true_path, fake_path)

        # Initialize tokenizer
        tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
        max_length = 128

        # Create DataLoader
        batch_size = 16
        dataloader = create_dataloader(texts, tokenizer, max_length, batch_size)

        # Ensure using CPU for debugging
        device = torch.device('cpu')
        print(f"Using device: {device}")

        # Initialize the model
        model = GPT2LMHeadModel.from_pretrained('gpt2').to(device)
        model.eval()

        # Process a controlled subset of data
        subset_size =  100 # Process only 100 records for faster execution

        # Generate new texts based on original texts and add them to the dataset
        texts, labels = generate_and_add_texts_to_dataset(model, tokenizer, texts, labels, label=1, max_new_tokens=50, subset_size=subset_size)
        texts, labels = generate_and_add_texts_to_dataset(model, tokenizer, texts, labels, label=0, max_new_tokens=50, subset_size=subset_size)

        # Verify the new dataset size
        print(f"Total texts: {len(texts)}")
        print(f"Total labels: {len(labels)}")

        # Create DataLoader with updated dataset
        dataloader = create_dataloader(texts, tokenizer, max_length, batch_size)

        # Convert to DataFrame to display
        df = pd.DataFrame({'text': texts, 'label': labels})
        display(df.head())  # Display the first few rows of the updated dataset
    except Exception as e:
        print(f"An error occurred: {e}")


Using device: cpu
Error processing text at index 0: 'Series' object has no attribute 'append'
Error processing text at index 1: 'Series' object has no attribute 'append'
Error processing text at index 2: 'Series' object has no attribute 'append'
Error processing text at index 3: 'Series' object has no attribute 'append'
Error generating text for input: NAIROBI/MOGADISHU (Reuters) - The size and methods...: index out of range in self


KeyboardInterrupt: 

Error generating text for input: Will this FINALLY be the straw that breaks the cam...: index out of range in self
Error generating text for input: THIS IS SO IMPORTANT! The transcript and video  be...: index out of range in self
Error generating text for input:  In response to the establishment media s contrive...: index out of range in self
Error generating text for input: ANKARA/BEIRUT (Reuters) - Syrian rebel fighters ar...: index out of range in self
Error generating text for input: How is a man with ties to a US based terror organi...: index out of range in self
Error generating text for input: BRUSSELS (Reuters) - When Theresa May visits Bruss...: index out of range in self
Error generating text for input: (Reuters) - Following is the full text of former F...: index out of range in self
Error generating text for input: Everyone suspected the sketchy Steele Dossier was ...: index out of range in self


In [74]:
# newly generated data

df = df.sample(frac=1).reset_index(drop=True)
df['label'].unique()

array([1])

In [72]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder

# Drop blank rows if any
df.dropna(inplace=True)

# Select a desired number of rows
desired_rows = 1000  # Change this to your desired number of rows
df = df.head(desired_rows)

# Feature extraction using TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['text']).toarray()

# Encode labels from combined_df_subset
encoder = LabelEncoder()
y = encoder.fit_transform(df['label'])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [73]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [68]:
# adaboost
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import classification_report

# Initialize and train AdaBoost classifier
ada = AdaBoostClassifier(n_estimators=100)
ada.fit(X_train, y_train)

# Predictions and evaluation
y_pred = ada.predict(X_test)
print("AdaBoost Classification Report:\n", classification_report(y_test, y_pred))


AdaBoost Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       200

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200



In [69]:
# GBM
from sklearn.ensemble import GradientBoostingClassifier

# Initialize and train GBM classifier
gbm = GradientBoostingClassifier(n_estimators=100)
gbm.fit(X_train, y_train)

# Predictions and evaluation
y_pred = gbm.predict(X_test)
print("GBM Classification Report:\n", classification_report(y_test, y_pred))


ValueError: y contains 1 class after sample_weight trimmed classes with zero weights, while a minimum of 2 classes are required.

In [None]:
# XGBM
import xgboost as xgb

# Initialize and train XGBoost classifier
xgb_model = xgb.XGBClassifier(n_estimators=100)
xgb_model.fit(X_train, y_train)

# Predictions and evaluation
y_pred = xgb_model.predict(X_test)
print("XGBoost Classification Report:\n", classification_report(y_test, y_pred))


In [None]:
# CNN 4 layers
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten, Dense, Dropout

# Tokenizing and padding sequences
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(df['text'])
X_seq = tokenizer.texts_to_sequences(df['text'])
X_pad = pad_sequences(X_seq, maxlen=100)

# Train-test split for CNN
X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42)

# Build 4-layer CNN model
model_4 = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=100),
    Conv1D(128, 5, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid')
])

model_4.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_4.summary()

# Train the model
model_4.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

# Evaluate the model
loss, accuracy = model_4.evaluate(X_test, y_test)
print(f'4-Layer CNN Accuracy: {accuracy}')


In [None]:
# CNN 6 layers
# Build 6-layer CNN model
model_6 = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=100),
    Conv1D(128, 5, activation='relu'),
    MaxPooling1D(pool_size=2),
    Conv1D(64, 5, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

model_6.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_6.summary()

# Train the model
model_6.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

# Evaluate the model
loss, accuracy = model_6.evaluate(X_test, y_test)
print(f'6-Layer CNN Accuracy: {accuracy}')


Error generating text for input: WASHINGTON (Reuters) - The U.S. Supreme Court on M...: index out of range in self
Error generating text for input: WASHINGTON/NEW YORK (Reuters) - A series of tweets...: index out of range in self
