# Trending tweets retrieving - Germany

In [1]:
import pandas as pd
import datetime

In [2]:
import twitter

# Go to https://developer.twitter.com/en/apps to create an app and get values
# for these credentials, which you'll need to provide in place of these
# empty string values that are defined as placeholders.
# See https://developer.twitter.com/en/docs/basics/authentication/overview/oauth
# for more information on Twitter's OAuth implementation.

# insert your keys below
CONSUMER_KEY = ''   # Key associated with the application
CONSUMER_SECRET = '' # Password used to authenticate with the authentication server
OAUTH_TOKEN = '' # Key given to the client after successful authentication of above keys
OAUTH_TOKEN_SECRET = '' # Password for the access key

# create an object called auth that represents your OAuth authorization
auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
                           CONSUMER_KEY, CONSUMER_SECRET)

# auth object is passed to a class called Twitter that is capable of issuing queries to Twitter’s API.
twitter_api = twitter.Twitter(auth=auth)

# Nothing to see by displaying twitter_api object except that it's now a
# defined variable. 
# It indicates that you’ve successfully used OAuth credentials to gain authorization to query Twitter’s API.

print(twitter_api)

<twitter.api.Twitter object at 0x00000281BE4C13A0>


In [3]:
#Retrieving the trends for Germany using the Twitter API and the geocode for the API for Germany

# See https://dev.twitter.com/docs/api/1.1/get/trends/place and
# http://developer.yahoo.com/geo/geoplanet/

DE_WOE_ID = 23424829 #Geocode for Germany


# Prefix ID with the underscore for query string parameterization.
# Without the underscore, the twitter package appends the ID value
# to the URL itself as a special case keyword argument.

de_trends = twitter_api.trends.place(_id=DE_WOE_ID)


In [6]:
# The idea behind the base file 'trend_names_base_ger.xlsx' is to add to a base excel file hard-coded trends to iterate over to retrieve tweets.
# The found trends would be also searched over on top of it. Unlike the case for Colombia. 
# In practice, only the API retrieved trends were considered, and therefore this base file is an empty excel file (only with the trend_names header)

stored_trend_names = pd.read_excel('trend_names_base_ger.xlsx')
stored_trend_names = stored_trend_names.drop(columns=['Unnamed: 0'])
stored_trend_names.head()

Unnamed: 0,trend_names


In [7]:
previous_trend_names = stored_trend_names.trend_names.values.tolist()
#previous_trend_names

In [8]:
trend_names = []
for trend in de_trends[0]['trends']:
    trend_names.append(trend['name']) #Appending all retrieved trends to a list
#trend_names

for trend in trend_names: #For each trends retrieved, check if it is not already on the list, to have only unique ones
    if trend not in previous_trend_names:
        print('added', trend, 'to trend_names to mine tweets')
        previous_trend_names.append(trend)
trends_to_save = previous_trend_names.copy() #This is the final list with the trends_names to save on a file

In [10]:
#Save trend names
now = datetime.datetime.now().strftime("%m.%d.%Y_%Hh")
name = 'Trend_names_germany_{}.xlsx'.format(now)
pd.DataFrame({'trend_names':trends_to_save}).to_excel(name)

### Retrieving tweets for each trending topic and relevant information for each of those tweets

**Important note:** The- folowing two block of code generate a dictionary with data from the tweets corresponding to each trending topic. This data will be stored in files formatted with the datetime of retrieval for later processing in the "Tweet_analysis.ipynb" file. For Germany, tweets both in english and german were considered. However, since the API of Twitter is restricted, only one language was used at the time. In this sense, one from the lines ' search_results = twitter_api.search.tweets(q=q, count=count, lang="de")' and 'search_results = twitter_api.search.tweets(q=q, count=count, lang="en") is commented at the moment of execution. One hour later, when the limit of the API is reset, the code is run again switching the commented and uncommented lines.

In [None]:
import json

# Set this variable to a trending topic, 
# or anything else for that matter. The example query below
# was a trending topic when this content was being developed
# and is used throughout the remainder of this chapter.

# A UTF-8, URL-encoded search query of 500 characters maximum, including operators.
# URL Encoding is used when placing text in a query string 
# to avoid it being confused with the URL itself.

tweets = {} #This dictionary will have information on each trending topic. The keys will be the trends themselves, and the values will be subdictionaries with the tweet texts, screen names, hashtags and tokenized texts (lists of words in the text)

for trending_topic in trends_to_save: 

    try:
        q = trending_topic

        # number of returned tweets
        count = 100

        # Import unquote to prevent url encoding errors in next_results
        from urllib.parse import unquote

        # See https://dev.twitter.com/rest/reference/get/search/tweets
        #print(q)
        #search_results = twitter_api.search.tweets(q=q, count=count, lang="en")
        search_results = twitter_api.search.tweets(q=q, count=count, lang="de")
        statuses = search_results['statuses']
        # Iterate through 5 more batches of results by following the cursor
        for _ in range(5):
            #print('Length of statuses', len(statuses))
            try:
                next_results = search_results['search_metadata']['next_results']
                # No more results when next_results doesn't exist;
                # get the actual exception object as the variable e
            except KeyError as e:  
                break
                
            # Create a dictionary from next_results
            kwargs = dict([ kv.split('=') for kv in unquote(next_results[1:]).split("&") ])
            #use of *args and **kwargs for expressing arbitrary arguments and keyword arguments
            search_results = twitter_api.search.tweets(**kwargs) 
            statuses += search_results['statuses']

        # Show one sample search result by slicing the list...
        #print(json.dumps(statuses[0], indent=1))

        #Save the data
        status_texts = [status['text'] 
                 for status in statuses]
        # screen name is the twitter user name of an account
        screen_names = [ user_mention['screen_name'] 
                        for status in statuses
                            for user_mention in status['entities']['user_mentions'] ]

        hashtags = [ hashtag['text'] 
                    for status in statuses
                        for hashtag in status['entities']['hashtags'] ]

        # Compute a collection of all words from all tweets
        words = [ w 
                for t in status_texts 
                    for w in t.split() ]

        tweets.update({trending_topic:{'text':status_texts, 'screen_names': screen_names, 'hastags':hashtags, 'words':words}})

    except:
        pass

In [12]:
tweets_df = pd.DataFrame(tweets) #Putting the tweet information in a DataFrame
tweets_df.head()

Unnamed: 0,#NordStream2,#Merz,#Sozialtourismus,Pipelines,Sabotage,Wochenteiler,#ENGGER,Ostsee,Kai Pflaume,Entschuldigung,...,Biden,Faschismus,Wortwahl,Bergfest,Methan,Röhren,explosionen,Faschisten,Ariel,Amerikaner
text,[RT @eventhoryzen: Amerikanische Geheimdienste...,[RT @heuteshow: Strack-Zimmermann über #Merz: ...,[RT @shengfui: Richtigstellung: In einem vorhe...,[RT @JueKarl: Was aus #Nordstream1 und #Nordst...,[RT @winkelsdorf: Mal etwas Hintergrund zur Sa...,[RT @kristina7968: Guten Morgen Ihr Lieben! Ei...,[RT @NrwRecherche: Deutsche Neonazis bei Lände...,[RT @Dt_Pl_Inst: Neue Ostsee-Pipeline: „Die Är...,[Tim Apfel und Kai Pflaume dass ich das noch e...,[RT @nouripour: Ihre Entschuldigung in allen E...,...,"[@pkbrln @AliBengali15 Ich glaube, Sie machen ...","[RT @DennisKBerlin: Kann ""wenn der Faschismus ...","[RT @astefanowitsch: Es geht nicht darum, ob d...",[Bergfest mit Dinos! 🎮Ohne💩hier gehts voll ab ...,[@L_Bednarz Es wird in die Atmosphäre entweich...,"[@AlexWallasch Vor Bornholm, weit weg von der ...",[RT @Anna_Lena2022: Explosionen von Nord Strea...,"[@focusonline Schamlos ist, dass der Focus die...",[RT @happilyintheam: erst Bibi &amp; Julian un...,[RT @MarkusL32078761: Politik und Medien schei...
screen_names,"[eventhoryzen, nikitheblogger, SteffenKotre, P...","[heuteshow, heuteshow, heuteshow, heuteshow, e...","[shengfui, E_Beiersdorfer, heuteshow, heutesho...","[JueKarl, rebew_lexa, DrLuetke, theotherphilip...","[winkelsdorf, harzhorn, rebew_lexa, DrLuetke, ...","[kristina7968, KWunsam, derleugner, Gertie8002...","[NrwRecherche, Tiefimwesten77, RubenGerczi, fc...","[Dt_Pl_Inst, AlexWallasch, PeterBorbe, welt, B...","[applefan81, NiklasHennings, vondrueben_, b00g...","[nouripour, BuchheitMarkus, Johann_v_d_Bron, J...",...,"[pkbrln, AliBengali15, DerOhneNAMEN3, Handlest...","[DennisKBerlin, GeorgDiez1, StrickSimon, elhot...","[astefanowitsch, Freddi_DE, nikitheblogger, Ba...","[ClancysSon, BOO_ZerO1, Sanny11468, CharlyM201...","[L_Bednarz, Leelah1, florianaigner, bund_net, ...","[AlexWallasch, grndmstrfesxh, MuellerTadzio, w...","[Anna_Lena2022, Anna_Lena2022, sonneundmars, B...","[focusonline, derspiegel, peteralthaus, Lam3th...","[happilyintheam, dviVerpackung, Team_Luftwaffe...","[MarkusL32078761, docknack, MarkusL32078761, w..."
hastags,"[NordStream2, NordStream2, Druckabfall, NordSt...","[Merz, Merz, Merz, Merz, Sozialtourismus, Merz...","[Merz, Merz, Merz, Correctiv, Sozialtourismus,...","[Nordstream1, Nordstream2, Nordstream, BalticP...","[Nordstream, Nordstream, BalticPipe, VonDerLey...","[StarWars, Andor, Wochenteiler, putin, krieg, ...","[ENGGER, ENGGER, ENGGER, NationsLeague, Kane, ...","[Nordstream1, NordStream2, Sabotage, NordStrea...","[U, Baltic, Weihnachtsbeleuchtung, Ende, NordS...","[VonDerLeyen, Italien, U, Baltic, Weihnachtsbe...",...,"[Druckabfall, Scholz, Nordstream2, Biden, Bide...","[Faschismus, CDU, CSU, Italien, Faschisten, Me...","[Erdogan, Bundestag, Merz, Erdogan, Bundestag,...","[StarCitizen, StarCitizen, bergfest, Bergfest,...","[Methan, CO2, Klimawandel, NS1, Methan, CO2, M...","[NordStream2, Nordstream, Putin, NordStream2, ...","[NordStream2, PKGr, Sabotage, Kriegserklärung,...","[Friedensdemo, Friedensdemo, Feminism, Feminis...","[Ariel, Lenor, FCBayern, Ariel, Nowplaying, MO...","[Amerikaner, Nordstream, Sanktionen, pipelines..."
words,"[RT, @eventhoryzen:, Amerikanische, Geheimdien...","[RT, @heuteshow:, Strack-Zimmermann, über, #Me...","[RT, @shengfui:, Richtigstellung:, In, einem, ...","[RT, @JueKarl:, Was, aus, #Nordstream1, und, #...","[RT, @winkelsdorf:, Mal, etwas, Hintergrund, z...","[RT, @kristina7968:, Guten, Morgen, Ihr, Liebe...","[RT, @NrwRecherche:, Deutsche, Neonazis, bei, ...","[RT, @Dt_Pl_Inst:, Neue, Ostsee-Pipeline:, „Di...","[Tim, Apfel, und, Kai, Pflaume, dass, ich, das...","[RT, @nouripour:, Ihre, Entschuldigung, in, al...",...,"[@pkbrln, @AliBengali15, Ich, glaube,, Sie, ma...","[RT, @DennisKBerlin:, Kann, ""wenn, der, Faschi...","[RT, @astefanowitsch:, Es, geht, nicht, darum,...","[Bergfest, mit, Dinos!, 🎮Ohne💩hier, gehts, vol...","[@L_Bednarz, Es, wird, in, die, Atmosphäre, en...","[@AlexWallasch, Vor, Bornholm,, weit, weg, von...","[RT, @Anna_Lena2022:, Explosionen, von, Nord, ...","[@focusonline, Schamlos, ist,, dass, der, Focu...","[RT, @happilyintheam:, erst, Bibi, &amp;, Juli...","[RT, @MarkusL32078761:, Politik, und, Medien, ..."


In [13]:
#Saving the tweets retrieved for the current datetime
now = datetime.datetime.now().strftime("%m.%d.%Y_%Hh")
name = 'GermanyTrends_{}.xlsx'.format(now)
tweets_df.to_excel(name)