# Scraper for Twitter using GetOldTweets3

Package: https://github.com/Mottl/GetOldTweets3

### Notebook Author: Martin Beck

In [1]:
# Pip install GetOldTweets3 if you don't already have the package
# !pip install GetOldTweets3

# Imports
import GetOldTweets3 as got
import pandas as pd

## Query by Username
Creation of queries using GetOldTweets3

Function is focused on completing the query then providing a CSV file of that query using pandas

In [2]:
# Function the pulls tweets from a specific username and turns to csv file

# Parameters: (list of twitter usernames), (max number of most recent tweets to pull from)
def username_tweets_to_csv(username, count):
    # Creation of query object
    tweetCriteria = got.manager.TweetCriteria().setUsername(username)\
                                            .setMaxTweets(count)
    # Creation of list that contains all tweets
    tweets = got.manager.TweetManager.getTweets(tweetCriteria)

    # Creating list of chosen tweet data
    user_tweets = [[tweet.username,tweet.date, tweet.text,tweet.retweets,tweet.favorites,tweet.hashtags ] for tweet in tweets]

    # Creation of dataframe from tweets list
    tweets_df = pd.DataFrame(user_tweets, columns = ['Username','Datetime', 'Text', 'retweets', 'favorites','hashtags' ])

    # Converting dataframe to CSV
    tweets_df.to_csv('{}-{}k-tweets.csv'.format(username, int(count/1000)), sep=',')

## Query by Text Search
Function is focused on completing the query then providing a CSV file of that query using pandas

In [None]:
# Function that pulls tweets based on a general search query and turns to csv file

# Parameters: (text query you want to search), (max number of most recent tweets to pull from)
def text_query_to_csv(text_query, count):
    # Creation of query object
    tweetCriteria = got.manager.TweetCriteria().setQuerySearch(text_query)\
                                                .setMaxTweets(count)
    # Creation of list that contains all tweets
    tweets = got.manager.TweetManager.getTweets(tweetCriteria)

    # Creating list of chosen tweet data
    text_tweets = [[tweet.date, tweet.text] for tweet in tweets]

    # Creation of dataframe from tweets
    tweets_df = pd.DataFrame(text_tweets, columns = ['Datetime', 'Text'])

    # Converting tweets dataframe to csv file
    tweets_df.to_csv('{}-{}k-tweets.csv'.format(text_query, int(count/1000)), sep=',')

## Query Function Calls
Putting it all together and using functions created.

In [3]:
# Input username(s) to scrape tweets and name csv file
# Max recent tweets pulls x amount of most recent tweets from that user
usernames = ['Khamenei_fa','Khamenei_ir', 'Rouhani_ir', 'Eshaq_jahangiri' , 'Dr_Vaezi', 'MB_Nobakht' , 'azarijahromi' 
                 ,'sm_bathaei', 'shariatmadari_m', 'BijanZanganeh' , 'Alirabiei_ir', 'S_A_Salehi' , 'ebtekarm_ir' , 'rezarahmanii'
                 ,'jamshid_ansari' , 'msoltanifar_ir', 'mounesan_ir' , 'JZarif' , 'amirnazemy', 'fatahi_ir', 'araghchi'
                 , 'barari_ir', 'kabiri_mj', 'tondgouyan','sadjadb', 'shohre_naseri', 'HaniyehSamei', 'saeed272727'
                 ,'RVahidianS', 'shahla_osouli', 'torabianIR', 'alilarijani' , 'ir_aref' , 'alimotahari_ir','mah_sadeghi'
                 , 'behroznemati' , 'SoroushAbolfazl' , 'AliSari1397', 'mahkiaee', 'kolivand98' ,'jalali_kazem'
                 ,'mehdi_mahdavian', 'mirmeysamasad' , 'zahrasaei_ir' , 'Badamchi_Media', 'AK_Hosseinzadeh','mazaniahmad'
                 ,'S_FatemeHoseini','a_rahimi_mp','ParvanehMafi','elyashazrati','Nahid_Tajedin','oladghobad_f','shirankhorasani'
                 ,'P_Salahshouri','saeidiftm','mahdi_sheykh','Zahedi_ir','ahmadamirabadi1','Drjalilrahimi','Ghheidari'
                 ,'TayebehSiavash','Khojasteh_ir','Jafarzadeh_ir','AliNajafi_ir','k_karampour','A_Yarmohammadi','ParsaeiB'
                 ,'drmdehghan','ahadazadikhah','HassanNooroozi','ahmadhematii','homayonhashemi','yagob_shivyari'
                 ,'yousefnejad_ir','KarimiGhodousi','zonnour','HajiDeligani','j_mohebinia','sfaridmousavi','moh_rafsanjani'
                 ,'ArvinBahare','salari_mohamad','shrbamani','HoseinNaghashi','AfshinHabibzade','AMasjedjamei','hojjat_nazari'
                 ,'aminimedia_ir','AliEtaMedia','MjHaghshenas','zahra_nezhad','N_Khodakarami','ElhamFakhari','hasanrasouli'
                 ,'HKhalilAbadi','farahani52','MMirlohii','sadrazamnouri','milani_arash','Bagheri_org','ghasemsoleimane'
                 ,'Azizjafaari_ir','mesbahyazdi_ir','rafsanjani_fa','Khatamimedia','Kadkhodaee_ir','HaddadAdel_ir','ir_rezaee'
                 ,'mb_ghalibaf','mowlaverdi','DrSaeedJalili','DrAboutalebi','Ahmadinejad_fa','Ahmadinejad1956','Zarghami_ez'
                 ,'Smmirsalim','pirouzhanachi','raisi_org','ar_moezi','hesamodin1','baeidinejad','NiknamSepanta','ZahraAhmadipour'
                 ,'Nahavandian_ir','alamolhoda_','qasemian_ir','kabi_abbas','Panahian_IR','Hn_jalali','sadighi_ir','Qarati_ir'
                 ,'Ahmadkhatami_ir','HassanKhomeini','alia_peyvandi','Fassih_F','AshrafBrujerdi','alishakourirad','v_seif'
                 ,'Dastjerdi_ir','hojatmirzaei','hajmajid47','fazelmaybodi','HadiKhaniki','1alpr','alirezaghanadan','hoseinipouya'
                 ,'HasankarimiG','mjnanakar','Shahin_Arpanahi','seyedmajidsadr2','EsmaeiliParviz','mohsenmirdamadi','IRIMFA'
            ]
# usernames = ['behdad_ka']
count = 3500

# Calling function to turn username's past X amount of tweets into a CSV file
for user in usernames:
    username_tweets_to_csv(user, count)

In [None]:
# Input search query to scrape tweets and name csv file
# Max recent tweets pulls x amount of most recent tweets from that user
text_query = 'USA Election 2020'
count = 5000

# Calling function to query X amount of relevant tweets and create a CSV file
text_query_to_csv(text_query, count)