In [1]:
import json
from datetime import date, timedelta

import pandas as pd
import matplotlib.pyplot as plt
from langdetect import detect
import glob
import os

from os import makedirs
from os.path import join, exists

In [2]:
from searchtweets import ResultStream, gen_rule_payload, load_credentials, collect_results

In [3]:
premium_search_args = load_credentials("creds_twitter_keys_archive.yaml",
                                       yaml_key="search_tweets_premium",
                                       env_overwrite=False)

Grabbing bearer token from OAUTH


In [4]:
def get_tweets(search_words, start_date, end_date, num_twt):
    rule = gen_rule_payload(search_words,
                        from_date=start_date.strftime("%Y-%m-%d"),
                        to_date=end_date.strftime("%Y-%m-%d"),
                        results_per_call=num_twt)
    
    tweets = collect_results(rule, max_results=num_twt, result_stream_args=premium_search_args)
    
    tweets_col_filter = [[tweet['created_at'], tweet['text'], tweet['id'], tweet['user']['id'], 
                tweet['user']['location'], tweet['user']['followers_count'], tweet['user']['lang'], 
                tweet['user']['time_zone'], tweet['retweet_count'], tweet['favorite_count']
               ] 
               for tweet in tweets]
    
    tweet_df = pd.DataFrame(data=tweets_col_filter, 
                    columns=['created_at', 'text', 'id', 'user_id', 'user_loc', 'user_followers', 'user_lang',
                            'user_time_zone', 'retweet_count', 'fav_count'])
    
    
    return tweet_df

In [5]:
def save_tweets(tweet_df, start_date, topic, year, cand):
    timestampStr = start_date.strftime("%Y_%m_%d")
    path = join('data2', 'raw', topic, year, cand)
    
    os.makedirs(path, exist_ok=True)
    
    tweet_df.to_csv(join(path, timestampStr + '.csv'))

In [6]:
def get_and_save_twts_by_day(start_date, topic, search_str, cand, num_days, year):
    
    for d in range(num_days):
        end_date = start_date + timedelta(days=1)
        #search_str = 'trump OR biden'
        num_twt = 500

        tweet_df = get_tweets(search_str, start_date, end_date, num_twt)
        save_tweets(tweet_df, start_date, topic, year, cand)

        print(start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d"))

        start_date = start_date + timedelta(days=1)
    

In [7]:
#get_and_save_twts_by_day(date(2012, 10, 30), 'vote romney', 1)
#hillary

In [29]:
def get_twt_data():
    #td = datetime.today() #- timedelta(days=1)
    td = date(2012, 10, 18)
    num_days = 2
    year = '2012'
    
    #get_and_save_twts_by_day(td, 'candidate', 'vote obama', 'obama', num_days, year)
    #get_and_save_twts_by_day(td, 'candidate', 'vote romney', 'romney', num_days, year)
    
    get_and_save_twts_by_day(td, 'economy', 'obama economy', 'obama', num_days, year)
    get_and_save_twts_by_day(td, 'economy', 'romney economy', 'romney', num_days, year)
    
    get_and_save_twts_by_day(td, 'environment', 'obama environment', 'obama', num_days, year)
    get_and_save_twts_by_day(td, 'environment', 'romney environment', 'romney', num_days, year)
    
    get_and_save_twts_by_day(td, 'health', 'obama health', 'obama', num_days, year)
    get_and_save_twts_by_day(td, 'health', 'romney health', 'romney', num_days, year)
    
    get_and_save_twts_by_day(td, 'immigration', 'obama immigration', 'obama', num_days, year)
    get_and_save_twts_by_day(td, 'immigration', 'romney immigration', 'romney', num_days, year)
    
    

In [30]:
get_twt_data()

2012-10-18 2012-10-19
2012-10-19 2012-10-20
2012-10-18 2012-10-19
2012-10-19 2012-10-20
2012-10-18 2012-10-19
2012-10-19 2012-10-20
2012-10-18 2012-10-19
2012-10-19 2012-10-20
2012-10-18 2012-10-19
2012-10-19 2012-10-20
2012-10-18 2012-10-19
2012-10-19 2012-10-20
2012-10-18 2012-10-19
2012-10-19 2012-10-20
2012-10-18 2012-10-19
2012-10-19 2012-10-20
