In [1]:
import os
import pandas as pd
import tweepy
from datetime import datetime
import tkinter as tk
from tkinter import ttk

# Função para buscar tweets usando a API v2
def search_twitter_tweets(api_key, api_secret_key, access_token, access_token_secret, query, since_date, until_date, max_tweets=50):
    client = tweepy.Client(bearer_token=access_token)
    
    all_tweets = []

    # Formatação da data
    since_date = datetime.strptime(since_date, '%Y-%m-%d').date()
    until_date = datetime.strptime(until_date, '%Y-%m-%d').date()

    query += f" lang:pt -is:retweet -is:reply -is:quote"

    try:
        for tweet in tweepy.Paginator(client.search_recent_tweets, query=query, tweet_fields=['id', 'text', 'author_id', 'created_at', 'lang', 'public_metrics', 'source', 'geo'], start_time=since_date, end_time=until_date, max_results=50).flatten(limit=max_tweets):
            tweet_data = {
                'TWEETID': tweet.id,
                'CREATEDAT': tweet.created_at,
                'TEXT': tweet.text,
                'AUTHORID': tweet.author_id,
                'AUTHORNAME': "",  # A API v2 não retorna diretamente o nome do autor, será necessário outro endpoint
                'AUTHORUSERNAME': "",  # A API v2 não retorna diretamente o nome do autor, será necessário outro endpoint
                'INREPLYTOSTATUSID': None,  # A API v2 não retorna diretamente
                'INREPLYTOUSERID': None,  # A API v2 não retorna diretamente
                'INREPLYTOUSERNAME': None,  # A API v2 não retorna diretamente
                'RETWEETCOUNT': tweet.public_metrics['retweet_count'],
                'LIKECOUNT': tweet.public_metrics['like_count'],
                'QUOTECOUNT': tweet.public_metrics['quote_count'],
                'REPLYCOUNT': tweet.public_metrics['reply_count'],
                'LANG': tweet.lang,
                'SOURCE': tweet.source,
                'GEO': tweet.geo,
                'COORDINATES': None,  # A API v2 não retorna diretamente
                'PLACE': None,  # A API v2 não retorna diretamente
                'HASHTAGS': [],  # A API v2 não retorna diretamente
                'USERMENTIONS': [],  # A API v2 não retorna diretamente
                'URLS': []  # A API v2 não retorna diretamente
            }
            all_tweets.append(tweet_data)
    except tweepy.TweepyException as e:
        print(f"Erro ao buscar tweets: {e}")
        raise
    
    return all_tweets

# Função para organizar os dados dos tweets
def organize_tweet_data(tweets):
    df = pd.DataFrame(tweets)
    df.columns = df.columns.str.upper()
    return df

# Função para salvar os dados em arquivos CSV individuais
def save_tweets_to_csv(organized_tweets, output_directory, query_date):
    output_directory = os.path.join(output_directory, query_date)
    os.makedirs(output_directory, exist_ok=True)
    
    for index, tweet in organized_tweets.iterrows():
        tweet_id = tweet['TWEETID']
        service = 'Twitter'
        date = tweet['CREATEDAT'].strftime('%Y%m%d%H%M')
        sentiment = 'NEUTRAL'  # Considerar como você deseja definir o sentimento
        index_formatted = str(index).zfill(7)
    
        filename = f"{service}_{tweet_id}_{date}_{sentiment}_{index_formatted}.csv"
        filepath = os.path.join(output_directory, filename)
    
        df_single_row = pd.DataFrame([tweet])
        df_single_row.to_csv(filepath, index=False)

# Função para rodar o código a partir do formulário
def run_code(api_key, api_secret_key, access_token, access_token_secret, query, since_date, until_date):
    max_tweets = 1000  # Ajuste o limite de tweets desejado
    tweets_per_batch = 50
    total_tweets = []
    
    while len(total_tweets) < max_tweets:
        remaining_tweets = max_tweets - len(total_tweets)
        batch_size = min(tweets_per_batch, remaining_tweets)
        
        tweets = search_twitter_tweets(api_key, api_secret_key, access_token, access_token_secret, query, since_date, until_date, max_tweets=batch_size)
        total_tweets.extend(tweets)
        
        if len(tweets) < batch_size:
            break  # Sai do loop se não houver mais tweets a serem recuperados
    
    organized_tweets = organize_tweet_data(total_tweets)
    
    # Obtém a data da consulta para criar o diretório de saída
    query_date = datetime.now().strftime('%Y%m%d')
    save_tweets_to_csv(organized_tweets, output_directory, query_date)
    print("Dados salvos com sucesso.")

# Configuração do formulário
def create_form():
    def on_submit():
        api_key = api_key_entry.get()
        api_secret_key = api_secret_key_entry.get()
        access_token = access_token_entry.get()
        access_token_secret = access_token_secret_entry.get()
        query = query_entry.get()
        since_date = since_date_entry.get()
        until_date = until_date_entry.get()
        root.destroy()
        run_code(api_key, api_secret_key, access_token, access_token_secret, query, since_date, until_date)

    root = tk.Tk()
    root.title("Twitter Data Fetcher")

    frame = ttk.Frame(root, padding="10")
    frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))

    api_key_label = ttk.Label(frame, text="API Key:")
    api_key_label.grid(row=0, column=0, sticky=tk.W)
    api_key_entry = ttk.Entry(frame, width=30)
    api_key_entry.grid(row=0, column=1, sticky=(tk.W, tk.E))

    api_secret_key_label = ttk.Label(frame, text="API Secret Key:")
    api_secret_key_label.grid(row=1, column=0, sticky=tk.W)
    api_secret_key_entry = ttk.Entry(frame, width=30, show='*')
    api_secret_key_entry.grid(row=1, column=1, sticky=(tk.W, tk.E))

    access_token_label = ttk.Label(frame, text="Access Token:")
    access_token_label.grid(row=2, column=0, sticky=tk.W)
    access_token_entry = ttk.Entry(frame, width=30)
    access_token_entry.grid(row=2, column=1, sticky=(tk.W, tk.E))

    access_token_secret_label = ttk.Label(frame, text="Access Token Secret:")
    access_token_secret_label.grid(row=3, column=0, sticky=tk.W)
    access_token_secret_entry = ttk.Entry(frame, width=30, show='*')
    access_token_secret_entry.grid(row=3, column=1, sticky=(tk.W, tk.E))

    query_label = ttk.Label(frame, text="Query:")
    query_label.grid(row=4, column=0, sticky=tk.W)
    query_entry = ttk.Entry(frame, width=30)
    query_entry.grid(row=4, column=1, sticky=(tk.W, tk.E))

    since_date_label = ttk.Label(frame, text="Since (YYYY-MM-DD):")
    since_date_label.grid(row=5, column=0, sticky=tk.W)
    since_date_entry = ttk.Entry(frame, width=30)
    since_date_entry.grid(row=5, column=1, sticky=(tk.W, tk.E))

    until_date_label = ttk.Label(frame, text="Until (YYYY-MM-DD):")
    until_date_label.grid(row=6, column=0, sticky=tk.W)
    until_date_entry = ttk.Entry(frame, width=30)
    until_date_entry.grid(row=6, column=1, sticky=(tk.W, tk.E))

    submit_button = ttk.Button(frame, text="Submit", command=on_submit)
    submit_button.grid(row=7, column=0, columnspan=2)

    root.mainloop()

# Definir o diretório de saída
output_directory = r'C:\Redes_Sociais\X\Publicacoes'

# Criar e exibir o formulário
create_form()


Erro ao buscar tweets: 401 Unauthorized
Unauthorized


Exception in Tkinter callback
Traceback (most recent call last):
  File "c:\Python312\Lib\tkinter\__init__.py", line 1967, in __call__
    return self.func(*args)
           ^^^^^^^^^^^^^^^^
  File "C:\Users\alex_\AppData\Local\Temp\ipykernel_11108\4193845720.py", line 110, in on_submit
    run_code(api_key, api_secret_key, access_token, access_token_secret, query, since_date, until_date)
  File "C:\Users\alex_\AppData\Local\Temp\ipykernel_11108\4193845720.py", line 86, in run_code
    tweets = search_twitter_tweets(api_key, api_secret_key, access_token, access_token_secret, query, since_date, until_date, max_tweets=batch_size)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\alex_\AppData\Local\Temp\ipykernel_11108\4193845720.py", line 21, in search_twitter_tweets
    for tweet in tweepy.Paginator(client.search_recent_tweets, query=query, tweet_fields=['id', 'text', 'au