# Trending tweets retrieving - Colombia

In [1]:
import pandas as pd
import datetime

In [2]:
import twitter

# Go to https://developer.twitter.com/en/apps to create an app and get values
# for these credentials, which you'll need to provide in place of these
# empty string values that are defined as placeholders.
# See https://developer.twitter.com/en/docs/basics/authentication/overview/oauth
# for more information on Twitter's OAuth implementation.

# insert your keys below
CONSUMER_KEY = ''   # Key associated with the application
CONSUMER_SECRET = '' # Password used to authenticate with the authentication server
OAUTH_TOKEN = '' # Key given to the client after successful authentication of above keys
OAUTH_TOKEN_SECRET = '' # Password for the access key

# create an object called auth that represents your OAuth authorization
auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
                           CONSUMER_KEY, CONSUMER_SECRET)

# auth object is passed to a class called Twitter that is capable of issuing queries to Twitter’s API.
twitter_api = twitter.Twitter(auth=auth)

# Nothing to see by displaying twitter_api object except that it's now a
# defined variable. 
# It indicates that you’ve successfully used OAuth credentials to gain authorization to query Twitter’s API.

print(twitter_api)

<twitter.api.Twitter object at 0x000001B45BB3F8B0>


In [None]:
#Retrieving the trends for Colombia using the Twitter API and the geocode for the API for Colombia

# See https://dev.twitter.com/docs/api/1.1/get/trends/place and
# http://developer.yahoo.com/geo/geoplanet/

CO_WOE_ID = 23424787 #Geocode for Colombia


# Prefix ID with the underscore for query string parameterization.
# Without the underscore, the twitter package appends the ID value
# to the URL itself as a special case keyword argument.

co_trends = twitter_api.trends.place(_id=CO_WOE_ID)

In [None]:
### At the begining of the project I hard coded some trends and piled up the first rend retrieval.
# I started creating separate trend name files for each day of request, but the first ones were hard coded to check if there were interesting tweets that would not appear in the trends, that I assumed could be relevant (like names of polititians). 
# These are in the trends_names_base_col.xlsx and that's why I add the trends of the current day on top of it. While processing the trends for relevance and frequency I remove the hard-coded ones and take the top ones from the actual retrieved trends from the API
stored_trend_names = pd.read_excel('trend_names_base_col.xlsx')
stored_trend_names = stored_trend_names.drop(columns=['Unnamed: 0'])
stored_trend_names.head()

Unnamed: 0,trend_names
0,Gustavo Petro
1,Francia Marquez
2,Maria Fernanda Cabal
3,Medelín
4,Bogotá


In [7]:
previous_trend_names = stored_trend_names.trend_names.values.tolist()
#previous_trend_names

In [8]:
trend_names = []
for trend in co_trends[0]['trends']:
    trend_names.append(trend['name']) #Appending all retrieved trends to a list
#trend_names

for trend in trend_names: #For each trends retrieved, check if it is not already on the list, to have only unique ones
    if trend not in previous_trend_names:
        print('added', trend, 'to trend_names to mine tweets')
        previous_trend_names.append(trend)
trends_to_save = previous_trend_names.copy() #This is the final list with the trends_names to save on a file

In [11]:
#Save trend names
now = datetime.datetime.now().strftime("%m.%d.%Y_%Hh")
name = 'Trend_names_col_{}.xlsx'.format(now)
pd.DataFrame({'trend_names':trends_to_save}).to_excel(name)

### Retrieving tweets for each trending topic and relevant information for each of those tweets

In [13]:
import json

# Set this variable to a trending topic, 
# or anything else for that matter. The example query below
# was a trending topic when this content was being developed
# and is used throughout the remainder of this chapter.

# A UTF-8, URL-encoded search query of 500 characters maximum, including operators.
# URL Encoding is used when placing text in a query string 
# to avoid it being confused with the URL itself.

tweets = {} #This dictionary will have information on each trending topic. The keys will be the trends themselves, and the values will be subdictionaries with the tweet texts, screen names, hashtags and tokenized texts (lists of words in the text)

for trending_topic in trends_to_save:

    try:
        q = trending_topic

        # number of returned tweets
        count = 100

        # Import unquote to prevent url encoding errors in next_results
        from urllib.parse import unquote

        # See https://dev.twitter.com/rest/reference/get/search/tweets

        search_results = twitter_api.search.tweets(q=q, count=count, lang="es")
        statuses = search_results['statuses']
        # Iterate through 5 more batches of results by following the cursor
        for _ in range(5):
            #print('Length of statuses', len(statuses))
            try:
                next_results = search_results['search_metadata']['next_results']
                # No more results when next_results doesn't exist;
                # get the actual exception object as the variable e
            except KeyError as e:  
                break
                
            # Create a dictionary from next_results, which has the following form
            kwargs = dict([ kv.split('=') for kv in unquote(next_results[1:]).split("&") ])
            #use of *args and **kwargs for expressing arbitrary arguments and keyword arguments
            search_results = twitter_api.search.tweets(**kwargs) 
            statuses += search_results['statuses']

        # Show one sample search result by slicing the list...
        #print(json.dumps(statuses[0], indent=1))

        #Save the data
        status_texts = [status['text'] 
                 for status in statuses]
        # screen name is the twitter user name of an account
        screen_names = [ user_mention['screen_name'] 
                        for status in statuses
                            for user_mention in status['entities']['user_mentions'] ]

        hashtags = [ hashtag['text'] 
                    for status in statuses
                        for hashtag in status['entities']['hashtags'] ]

        # Compute a collection of all words from all tweets
        words = [ w 
                for t in status_texts 
                    for w in t.split() ]

        tweets.update({trending_topic:{'text':status_texts, 'screen_names': screen_names, 'hastags':hashtags, 'words':words}})

    except:
        pass

In [None]:
tweets_df = pd.DataFrame(tweets) #Putting the tweet information in a DataFrame
tweets_df.head()

Unnamed: 0,Gustavo Petro,Francia Marquez,Maria Fernanda Cabal,Medelín,Bogotá,Cartagena,Cali,#LaCatedralPetrista,#FiscalEnApuros,#LosDanieles,...,CAIs,#losmalosson,#Marcha26S,Italia,Venezuela,Toto Vega,Medellín,fuera petro,Maguire,plaza de Bolívar
text,[RT @AlbertoRodNews: ÚLTIMA HORA | Estallan la...,[RT @RicardoMalagonS: “Francia Márquez es un s...,[RT @MariaFdaCabal: Qué mal periodismo el de ⁦...,[@petrogustavo Cargue la foto de Medelín @petr...,[RT @WilliamDelgadoG: Cientos de ciudadanos ma...,"[Mereció mucho más el Cartagena, ante un Alavé...","[RT @MHEO_: ¡Alcalde de Cali @JorgeIvanOspina,...",[RT @jarizabaletaf: -No erradicación \n-No b...,[RT @ColSigloXXl: #fiscalenapuros por corrupto...,[RT @DCoronell: #LaCaídaDelRectorMagnífico es ...,...,[RT @AlbertoBernalLe: Esteban nunca entendió n...,[RT @GustavoBolivar: #LosMalosSon\nEl CD repit...,[RT @Dulcinela0907: Jajajajajaja dicen “fracas...,[RT @jpurias: En Italia los votos de la izquie...,[RT @ElNacionalWeb: #26Sep | Reabren oficialme...,[RT @DCoronell: Me despierto con la triste not...,[RT @Tolaymaruja: Un gracioso se puso de charr...,[RT @bettycriticonaa: Fuera Petro !!\nSe le vo...,[RT @LaScaloneta: Que humo que es este Maguire...,[RT @guenmecu: Los fascistas del uribismo son ...
screen_names,"[AlbertoRodNews, IvanCepedaCast, RanCarcris, J...","[RicardoMalagonS, RicardoMalagonS, RicardoMala...","[MariaFdaCabal, infobae, MariaFdaCabal, infoba...","[petrogustavo, petrogustavo, osesneyder1210, J...","[WilliamDelgadoG, RevistaSemana, elespectador,...","[TuriCartagenaES, Alaves, nanoxve, Pablo_Norte...","[MHEO_, JorgeIvanOspina, elpaiscali, DiegoASan...","[jarizabaletaf, luzmabe1959, ValenLafaurie_, j...","[ColSigloXXl, FiscaliaCol, FiscaliaCol, ismago...","[DCoronell, Jalvasa00, shakira, MarthaV2539792...",...,"[AlbertoBernalLe, ArielAnaliza, ArielAnaliza, ...","[GustavoBolivar, CamiloG2022, Enrique_GomezM, ...","[Dulcinela0907, Pais_Hermoso, MargaritaRepo, P...","[jpurias, DonMitxel_I, PeladodeCronica, viquir...","[ElNacionalWeb, UN_HRC, PartidoPSUV, FraseSimp...","[DCoronell, Toto_Vega, Noridaoficial, JuanCami...","[Tolaymaruja, Citytv, QuinteroCalle, DanielSam...","[bettycriticonaa, everstrongever, Julian186881...","[LaScaloneta, ManuHeredia21, MrZizou16, Invict...","[guenmecu, petrogustavo, williechirinos, WRadi..."
hastags,"[VideoBlu, AHORA, 26Sep, Colombia, Protesta, A...","[TopBlu, TopBlu, MananasBLU, FeminismoSelectiv...","[EleccionesEnItalia, TodosALaCalle26S, YoNoPar...","[ACTUALIDAD, DeInterés, ACTUALIDAD, DeInterés,...","[VIDEO, YoNoMarchoYoProduzco, SeguridadAlGaret...","[TalDíaComoHoy, DíaInternacionaldelTurismo, me...","[Mheo, LigaBetPlay, PetroHamponElPuebloNoEsHue...","[LaCatedralPetrista, LaCatedralPetrista, lacat...","[fiscalenapuros, fiscalenapuros, FiscalEnApuro...","[LaCaídaDelRectorMagnífico, LosDanieles, EnElA...",...,"[TodosALaCalle26S, TodosALaCalle26S, petristas...","[LosMalosSon, TodosALaCalle26S, LosMalosSon, L...","[MarchaDelSalchichon, Marcha26S, MarchaDelSalc...","[ÚltimaHora, FelizMartes, Italia, FelizMartes,...","[26Sep, HRC51, VIDEO, 26Sep, Venezuela, EsNoti...","[VeaLoMásLeído, otoVega, TotoVega, Barichara, ...","[26Sep, Colombia, Protesta, colombiano, Medell...","[TodosALaCalle26S, LosMalosSon, 26Sep, TodosAL...","[Automotive, NationsLeague, Maguire, Marvel, M...","[NoticiaW, NoticiaW, NoticiaW, Bogotá, AHORA, ..."
words,"[RT, @AlbertoRodNews:, ÚLTIMA, HORA, |, Estall...","[RT, @RicardoMalagonS:, “Francia, Márquez, es,...","[RT, @MariaFdaCabal:, Qué, mal, periodismo, el...","[@petrogustavo, Cargue, la, foto, de, Medelín,...","[RT, @WilliamDelgadoG:, Cientos, de, ciudadano...","[Mereció, mucho, más, el, Cartagena,, ante, un...","[RT, @MHEO_:, ¡Alcalde, de, Cali, @JorgeIvanOs...","[RT, @jarizabaletaf:, -No, erradicación, -No, ...","[RT, @ColSigloXXl:, #fiscalenapuros, por, corr...","[RT, @DCoronell:, #LaCaídaDelRectorMagnífico, ...",...,"[RT, @AlbertoBernalLe:, Esteban, nunca, entend...","[RT, @GustavoBolivar:, #LosMalosSon, El, CD, r...","[RT, @Dulcinela0907:, Jajajajajaja, dicen, “fr...","[RT, @jpurias:, En, Italia, los, votos, de, la...","[RT, @ElNacionalWeb:, #26Sep, |, Reabren, ofic...","[RT, @DCoronell:, Me, despierto, con, la, tris...","[RT, @Tolaymaruja:, Un, gracioso, se, puso, de...","[RT, @bettycriticonaa:, Fuera, Petro, !!, Se, ...","[RT, @LaScaloneta:, Que, humo, que, es, este, ...","[RT, @guenmecu:, Los, fascistas, del, uribismo..."


In [None]:
#**Analyze the most frequent words in tweets**:
from collections import Counter

# Flatten the list of words from all tweets
all_words = [word for words in tweets_df.loc['words'] for word in words]

# Count the frequency of each word
word_counts = Counter(all_words)

# Display the most common words
print(word_counts.most_common(10))

In [None]:
#Saving the tweets retrieved for the current datetime
now = datetime.datetime.now().strftime("%m.%d.%Y_%Hh")
name = 'ColombiaTrends_{}.xlsx'.format(now)
tweets_df.to_excel(name)