In [1]:
import tweepy
import pandas as pd
import nltk
import fasttext as ft
import json
import datetime

In [2]:
# Definimos sus tokens de acceso
with open('credentials.json') as f:
    credentials = json.load(f)
    CONSUMER_KEY = credentials['CONSUMER_KEY']
    CONSUMER_SECRET = credentials['CONSUMER_SECRET']
    ACCESS_TOKEN = credentials['ACCESS_TOKEN']
    ACCESS_SECRET = credentials['ACCESS_SECRET']

In [3]:
# defimos esta función para configurar el acceso al API
def connect_to_twitter_OAuth():
    auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
    auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)
    api = tweepy.API(auth)
    return api

# Creamos un API object 
api = connect_to_twitter_OAuth()

In [4]:
def extract_tweet_attributes(tweet_object):
    # create empty list
    tweet_list =[]
    # loop through tweet objects
    for tweet in tweet_object:
        tweet_id = tweet.id # unique integer identifier for tweet
        text = tweet.text # utf-8 text of tweet
        favorite_count = tweet.favorite_count
        retweet_count = tweet.retweet_count
        created_at = tweet.created_at # utc time tweet created
        source = tweet.source # utility used to post tweet
        reply_to_status = tweet.in_reply_to_status_id # if reply int of orginal tweet id
        reply_to_user = tweet.in_reply_to_screen_name # if reply original tweetes screenname
        retweets = tweet.retweet_count # number of times this tweet retweeted
        favorites = tweet.favorite_count # number of time this tweet liked
        # append attributes to list
        tweet_list.append({'tweet_id':tweet_id, 
                          'text':text, 
                          'favorite_count':favorite_count,
                          'retweet_count':retweet_count,
                          'created_at':created_at, 
                          'source':source, 
                          'reply_to_status':reply_to_status, 
                          'reply_to_user':reply_to_user,
                          'retweets':retweets,
                          'favorites':favorites})
    # create dataframe   
    df = pd.DataFrame(tweet_list, columns=['tweet_id',
                                           'text',
                                           'favorite_count',
                                           'retweet_count',
                                           'created_at',
                                           'source',
                                           'reply_to_status',
                                           'reply_to_user',
                                           'retweets',
                                           'favorites'])
    return df

In [5]:
# Leer 100 tweets en Español, buscando por algún personaje famoso. Seleccionar el personaje.
def get_tweets(keyword):
    public_tweets = api.search_tweets(keyword, lang="es", count=100)
    texts = ""
    for tweet in public_tweets:
        texts += tweet.text
    df = extract_tweet_attributes(public_tweets)
    df['keyword'] = keyword
    return df

In [6]:
# Convertimos los tweets en un data frame
#df = extract_tweet_attributes(public_tweets)
df = get_tweets(u"SectorMovilidad")

In [7]:
df.head(5)

Unnamed: 0,tweet_id,text,favorite_count,retweet_count,created_at,source,reply_to_status,reply_to_user,retweets,favorites,keyword
0,1526037834552983552,@MadameSiimone @SectorMovilidad @EEopinion @el...,1,0,2022-05-16 03:12:26+00:00,Twitter for Android,,MadameSiimone,0,1,SectorMovilidad
1,1526033330352754689,RT @luciabastidasu: #VentanasRotas\n\n#Ventana...,0,21,2022-05-16 02:54:32+00:00,Twitter for iPhone,,,21,0,SectorMovilidad
2,1526032999342608384,RT @luciabastidasu: #VentanasRotas\n\n#Ventana...,0,21,2022-05-16 02:53:14+00:00,Twitter for Android,,,21,0,SectorMovilidad
3,1526032439461785603,@CPorcolombia @luciabastidasu @Fontibon_Bogota...,0,0,2022-05-16 02:51:00+00:00,Twitter for Android,1.525856e+18,CPorcolombia,0,0,SectorMovilidad
4,1526031055026782208,Sin berma! Sin distancia ni respeto por vida d...,0,0,2022-05-16 02:45:30+00:00,Twitter for Android,,,0,0,SectorMovilidad


In [8]:
def extract_data(keywords):
    for keyword in keywords:
        print('Extracting:', keyword, '...')
        df = get_tweets(keyword)
        df['text'] = df['text'].str.replace(',','')
        df.to_csv('./data/df_'+ keyword + '_' + \
                  str(datetime.datetime.now().day) + '_'+ str(datetime.datetime.now().month) + '_'+str(datetime.datetime.now().year) \
                  + '_'+ str(datetime.datetime.now().hour) + '..' + str(datetime.datetime.now().minute) + '.csv',
                  index=False)

In [9]:
keywords = [u"SectorMovilidad" , u"ciclorruta", u"pico y placa", u"ciclo ruta"]
extract_data(keywords)

Extracting: SectorMovilidad ...
Extracting: ciclorruta ...
Extracting: pico y placa ...
Extracting: ciclo ruta ...
