In [None]:
import re
import pandas as pd
import tweepy
import configparser

# Read the configuration file
config = configparser.ConfigParser()
config.read("config.ini")

# Get the API keys and tokens from the configuration file
API_KEY = config["twitter"]["API_KEY"]
API_SECRET_KEY = config["twitter"]["API_SECRET_KEY"]
ACCESS_TOKEN = config["twitter"]["ACCESS_TOKEN"]
ACCESS_TOKEN_SECRET = config["twitter"]["ACCESS_TOKEN_SECRET"]

auth = tweepy.OAuth1UserHandler(
    API_KEY,
    API_SECRET_KEY,
    ACCESS_TOKEN,
    ACCESS_TOKEN_SECRET
)

api = tweepy.API(auth)

keywords = ['#news']

tweets = []
for keyword in keywords:
    results = api.search_tweets(keyword, tweet_mode="extended")
    tweets.extend(results)


def remove_emoji(string):
    emoji_pattern = re.compile(
        "["
        "\U0001F600-\U0001F64F"  # emoticons
        "\U0001F300-\U0001F5FF"  # symbols & pictographs
        "\U0001F680-\U0001F6FF"  # transport & map symbols
        "\U0001F1E0-\U0001F1FF"  # flags (iOS)
        "\U00002702-\U000027B0"
        "\U000024C2-\U0001F251"
        "]+",
        flags=re.UNICODE,
    )
    return emoji_pattern.sub(r'', string)


def preprocess_text(text):
    text = text.lower()
    text = remove_emoji(text)
    # Remove special symbols and punctuation
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)
    return text

texts = []
tweet_ids = []
usernames = []
for tweet in tweets:
    try:
        text = tweet.retweeted_status.full_text
    except AttributeError:
        text = tweet.full_text
    preprocessed_text = preprocess_text(text)
    texts.append(preprocessed_text)
    tweet_ids.append(tweet.id)
    usernames.append(tweet.user.screen_name)

data = pd.DataFrame({"tweet_id": tweet_ids, "username": usernames, "text": texts})
data.to_csv("fake_news_data.csv", index=False)
