In [43]:
import sys
import time
import requests
from datetime import datetime, timedelta
import json
import fasttext
import re

MAX_RETRIEVED_ELEMENTS = 1000
PUSHSHIFT_ENDPOINT = 'https://api.pushshift.io/reddit/search/'
THRESHOLD = 0.6
PATTERN = re.compile("&gt; (.*)\n\n")
EARLIEST_DATE = datetime(2019, 12, 19)
DIRNAME = 'data_r3/'

In [44]:
PRETRAINED_MODEL_PATH = 'lid.176.bin'
model = fasttext.load_model(PRETRAINED_MODEL_PATH)




In [45]:
def none_or_empty(text):
    return text is None or len(text) == 0 or text == "[removed]" or text == '[deleted]'

In [46]:
def preprocess(text):
    # Need to get rid of &gt for quotes
    preproc = PATTERN.sub("", text)
    
    # Need to get rid of newlines for fasttext to work
    preproc = re.sub("\n", " ", preproc)
    return preproc

In [47]:
def valid_text(text):
    if none_or_empty(text):
        return False, ""
    preproc_text = preprocess(text)
    labels, prob = model.predict(preproc_text)
    if labels[0].endswith('en') and prob[0] >= THRESHOLD:
        return True, preproc_text
    return False, ""
    

In [7]:
def retrieve_subreddit_data(subreddit, retrieval_type, dirname, fields, keywords, dates):
#     today = datetime.datetime.utcnow()
#     today_timestamp = int((today - datetime.datetime(1970, 1, 1)).total_seconds())
#     before_date = today_timestamp
    
    fields = ",".join(fields)
    query_words = "|".join(keywords)
    text_tag = 'body' if retrieval_type == 'comment' else 'selftext'
   
    count = 0
    total_valid = 0
    diachronic_valid = []
    output_filename = 'r.' + subreddit + '.' + retrieval_type + '.json'
    with open(dirname + output_filename, 'w') as fout:
        for i in range(len(dates) - 1):
            after = dates[i] #date farther from present
            before = dates[i+1] #date closer to present
            epoch_valid = 0 # number of data points in the given time period
            done = False

            while not done:
                count += 1
                query = f"{PUSHSHIFT_ENDPOINT}{retrieval_type}/?subreddit={subreddit}&sort=asc&size={MAX_RETRIEVED_ELEMENTS}" +\
                f"&before={before}&after={after}&fields={fields}&q={query_words}"

#                 query = PUSHSHIFT_ENDPOINT + retrieval_type + '/?subreddit=' + subreddit + \
#                 '&sort=desc&size=' + str(MAX_RETRIEVED_ELEMENTS) + '&before=' + str(before) + '&fields=' + fields + \
#                 "&q=" + query_words + 

                print(query, 'request #', count)
                sys.stdout.flush()
                try:
                    r = requests.get(query)
                except:
                    print('exception thrown...')
                    time.sleep(5)
                    continue

                if r.status_code != 200:
                    print('bad response code:', r.status_code)
                    if r.status_code == 429:
                        time.sleep(5)
                        continue
                    after += 86400  # slight change to the query
                    continue  # retry

    # record a submission/comment only if it's not empty

                for i, element in enumerate(r.json()['data']):
                    latest = element['created_utc']
#                     if 'created_utc' in element.keys():
#                         before_date = element['created_utc']
#                         if before_date < EARLIEST_DATE:
#                             done = True
#                             break
                    try:
                        is_valid, formatted = valid_text(element[text_tag])
                    except KeyError:
                        continue
                    if is_valid:
                        element[text_tag] = formatted
                        json.dump(element, fout)
                        fout.write('\n')
                        total_valid += 1
                        epoch_valid += 1
            
                if len(r.json()['data']) < MAX_RETRIEVED_ELEMENTS:  # end of data
                    done = True
                else:
                    after = latest
            diachronic_valid.append(epoch_valid)
                
    print("SUBREDDIT: r/{}".format(subreddit))
    print(retrieval_type)
    print(text_tag)
    print(total_valid)
    return total_valid, diachronic_valid

In [48]:
def get_dates(latest_date):
    # Get all week increments from earliest date until present
    dates = []
    epochs = []
    curr = EARLIEST_DATE
    while curr < latest_date:
        dates.append(curr.strftime("%m/%d/%Y"))
        epochs.append(int(curr.timestamp()))
        curr += timedelta(weeks=1)
    dates.append(latest_date.strftime("%m/%d/%Y"))
    epochs.append(int(latest_date.timestamp()))
    return dates, epochs
        
    

# Section 1: Body of Submissions and Keyword Comments
In this section, we look at submissions that have a non-empty body, and comments across submissions that include the given keywords. We ignore posts that where only titles have the keywords. The csvs capture weekly and aggregate numbers for relevant posts across submissions.

In [9]:
retrieval_types = ['submission', 'comment']
country_subreddits = ['europe', 'spain', 'italy', 'unitedkingdom', 'germany', 'iran', 'japan', 'china', 'singapore',
              'india', 'korea', 'france', 'unitedstatesofamerica', 'canada', 'onguardforthee', 'coronavirusus', 
              'canadacoronavirus', 'turkey', 'russia', 'brasil', 'belgium', 'thenetherlands', 'portugal', 'switzerland', 
              'peru', 'coronavirusuk']
state_subreddits = ['coronavirusca', 'coronavirusNewYork', 'newyork', 'newjersey', 'massachusetts', 'California',  
                    'Pennsylvania', 'illinois', 'michigan', 'florida', 'louisiana', 'texas', 'coronavirusAZ']  
fields = {'submission': ['author', 'id', 'selftext', 'permalink', 'title', 'created_utc'], 
          'comment': ['author', 'body', 'created_utc', 'permalink']}
keywords = ['covid', 'covid-19', 'coronavirus', 'corona', 'covid19', "pandemic", '"the%20virus"']
#optionally: pandemic?
dates, epochs = get_dates(datetime.utcnow())

output_filename = 'overall_stats.csv'
diachronic_filename = 'diachronic_stats.csv'
with open(DIRNAME + output_filename, 'w') as fout:
    with open(DIRNAME + diachronic_filename, 'w') as dfout:
        fout.write('Subreddit,Content Type,Sample Size\n')
        dfout.write('Subreddit,Content Type, Week Start, Week End, Sample Size\n')
        for sub in country_subreddits + state_subreddits:
            for r_type in retrieval_types:
                total_valid, diachronic_valid = retrieve_subreddit_data(sub, r_type, DIRNAME, fields[r_type], keywords, epochs)
                fout.write(f"{sub},{r_type},{total_valid}\n")
                for date_idx in range(len(dates)-1):
                    dfout.write(f"{sub},{r_type},{dates[date_idx]},{dates[date_idx+1]},{diachronic_valid[date_idx]}\n")

https://api.pushshift.io/reddit/search/submission/?subreddit=europe&sort=asc&size=1000&before=1577336400&after=1576731600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 1
https://api.pushshift.io/reddit/search/submission/?subreddit=europe&sort=asc&size=1000&before=1577941200&after=1577336400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 2
https://api.pushshift.io/reddit/search/submission/?subreddit=europe&sort=asc&size=1000&before=1578546000&after=1577941200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 3
https://api.pushshift.io/reddit/search/submission/?subreddit=europe&sort=asc&size=1000&before=1579150800&after=1578546000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%2

https://api.pushshift.io/reddit/search/comment/?subreddit=europe&sort=asc&size=1000&before=1584590400&after=1584351295&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 16
https://api.pushshift.io/reddit/search/comment/?subreddit=europe&sort=asc&size=1000&before=1584590400&after=1584474002&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 17
https://api.pushshift.io/reddit/search/comment/?subreddit=europe&sort=asc&size=1000&before=1585195200&after=1584590400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 18
https://api.pushshift.io/reddit/search/comment/?subreddit=europe&sort=asc&size=1000&before=1585195200&after=1584724278&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 19
https://api.pushshift.io/reddit/search/c

https://api.pushshift.io/reddit/search/submission/?subreddit=spain&sort=asc&size=1000&before=1586404800&after=1585800000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 16
https://api.pushshift.io/reddit/search/submission/?subreddit=spain&sort=asc&size=1000&before=1587009600&after=1586404800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 17
https://api.pushshift.io/reddit/search/submission/?subreddit=spain&sort=asc&size=1000&before=1587583751&after=1587009600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 18
SUBREDDIT: r/spain
submission
selftext
6
https://api.pushshift.io/reddit/search/comment/?subreddit=spain&sort=asc&size=1000&before=1577336400&after=1576731600&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona

https://api.pushshift.io/reddit/search/submission/?subreddit=italy&sort=asc&size=1000&before=1583384400&after=1582779600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 12
https://api.pushshift.io/reddit/search/submission/?subreddit=italy&sort=asc&size=1000&before=1583985600&after=1583384400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 13
https://api.pushshift.io/reddit/search/submission/?subreddit=italy&sort=asc&size=1000&before=1584590400&after=1583985600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 14
https://api.pushshift.io/reddit/search/submission/?subreddit=italy&sort=asc&size=1000&before=1585195200&after=1584590400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20

https://api.pushshift.io/reddit/search/comment/?subreddit=italy&sort=asc&size=1000&before=1586404800&after=1586283456&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 27
https://api.pushshift.io/reddit/search/comment/?subreddit=italy&sort=asc&size=1000&before=1587009600&after=1586404800&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 28
https://api.pushshift.io/reddit/search/comment/?subreddit=italy&sort=asc&size=1000&before=1587009600&after=1586947096&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 29
https://api.pushshift.io/reddit/search/comment/?subreddit=italy&sort=asc&size=1000&before=1587583751&after=1587009600&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 30
https://api.pushshift.io/reddit/search/comme

https://api.pushshift.io/reddit/search/comment/?subreddit=unitedkingdom&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 10
https://api.pushshift.io/reddit/search/comment/?subreddit=unitedkingdom&sort=asc&size=1000&before=1583384400&after=1582779600&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 11
https://api.pushshift.io/reddit/search/comment/?subreddit=unitedkingdom&sort=asc&size=1000&before=1583985600&after=1583384400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 12
https://api.pushshift.io/reddit/search/comment/?subreddit=unitedkingdom&sort=asc&size=1000&before=1583985600&after=1583832468&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 13
https://api.

https://api.pushshift.io/reddit/search/submission/?subreddit=germany&sort=asc&size=1000&before=1581570000&after=1580965200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 8
https://api.pushshift.io/reddit/search/submission/?subreddit=germany&sort=asc&size=1000&before=1582174800&after=1581570000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 9
https://api.pushshift.io/reddit/search/submission/?subreddit=germany&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 10
https://api.pushshift.io/reddit/search/submission/?subreddit=germany&sort=asc&size=1000&before=1583384400&after=1582779600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"

bad response code: 429
https://api.pushshift.io/reddit/search/submission/?subreddit=iran&sort=asc&size=1000&before=1579150800&after=1578546000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 5
https://api.pushshift.io/reddit/search/submission/?subreddit=iran&sort=asc&size=1000&before=1579755600&after=1579150800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 6
https://api.pushshift.io/reddit/search/submission/?subreddit=iran&sort=asc&size=1000&before=1580360400&after=1579755600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 7
https://api.pushshift.io/reddit/search/submission/?subreddit=iran&sort=asc&size=1000&before=1580965200&after=1580360400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|

SUBREDDIT: r/iran
comment
body
469
https://api.pushshift.io/reddit/search/submission/?subreddit=japan&sort=asc&size=1000&before=1577336400&after=1576731600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 1
https://api.pushshift.io/reddit/search/submission/?subreddit=japan&sort=asc&size=1000&before=1577941200&after=1577336400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 2
https://api.pushshift.io/reddit/search/submission/?subreddit=japan&sort=asc&size=1000&before=1578546000&after=1577941200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 3
https://api.pushshift.io/reddit/search/submission/?subreddit=japan&sort=asc&size=1000&before=1579150800&after=1578546000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus

https://api.pushshift.io/reddit/search/comment/?subreddit=japan&sort=asc&size=1000&before=1586404800&after=1585800000&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 16
https://api.pushshift.io/reddit/search/comment/?subreddit=japan&sort=asc&size=1000&before=1587009600&after=1586404800&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 17
https://api.pushshift.io/reddit/search/comment/?subreddit=japan&sort=asc&size=1000&before=1587583751&after=1587009600&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 18
SUBREDDIT: r/japan
comment
body
3596
https://api.pushshift.io/reddit/search/submission/?subreddit=china&sort=asc&size=1000&before=1577336400&after=1576731600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" requ

https://api.pushshift.io/reddit/search/comment/?subreddit=china&sort=asc&size=1000&before=1584590400&after=1583985600&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 13
https://api.pushshift.io/reddit/search/comment/?subreddit=china&sort=asc&size=1000&before=1584590400&after=1584503916&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 14
https://api.pushshift.io/reddit/search/comment/?subreddit=china&sort=asc&size=1000&before=1585195200&after=1584590400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 15
https://api.pushshift.io/reddit/search/comment/?subreddit=china&sort=asc&size=1000&before=1585800000&after=1585195200&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 16
https://api.pushshift.io/reddit/search/comme

https://api.pushshift.io/reddit/search/comment/?subreddit=singapore&sort=asc&size=1000&before=1581570000&after=1580965200&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 8
https://api.pushshift.io/reddit/search/comment/?subreddit=singapore&sort=asc&size=1000&before=1581570000&after=1581414221&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 9
https://api.pushshift.io/reddit/search/comment/?subreddit=singapore&sort=asc&size=1000&before=1582174800&after=1581570000&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 10
https://api.pushshift.io/reddit/search/comment/?subreddit=singapore&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 11
https://api.pushshift.io/reddi

https://api.pushshift.io/reddit/search/submission/?subreddit=india&sort=asc&size=1000&before=1585800000&after=1585195200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 16
https://api.pushshift.io/reddit/search/submission/?subreddit=india&sort=asc&size=1000&before=1585800000&after=1585626349&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 17
https://api.pushshift.io/reddit/search/submission/?subreddit=india&sort=asc&size=1000&before=1586404800&after=1585800000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 18
https://api.pushshift.io/reddit/search/submission/?subreddit=india&sort=asc&size=1000&before=1586404800&after=1586270979&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20

https://api.pushshift.io/reddit/search/comment/?subreddit=india&sort=asc&size=1000&before=1586404800&after=1586401791&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 29
https://api.pushshift.io/reddit/search/comment/?subreddit=india&sort=asc&size=1000&before=1587009600&after=1586404800&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 30
https://api.pushshift.io/reddit/search/comment/?subreddit=india&sort=asc&size=1000&before=1587009600&after=1586680285&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 31
https://api.pushshift.io/reddit/search/comment/?subreddit=india&sort=asc&size=1000&before=1587009600&after=1586963063&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 32
https://api.pushshift.io/reddit/search/comme

https://api.pushshift.io/reddit/search/comment/?subreddit=korea&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 10
https://api.pushshift.io/reddit/search/comment/?subreddit=korea&sort=asc&size=1000&before=1583384400&after=1582779600&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 11
https://api.pushshift.io/reddit/search/comment/?subreddit=korea&sort=asc&size=1000&before=1583985600&after=1583384400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 12
https://api.pushshift.io/reddit/search/comment/?subreddit=korea&sort=asc&size=1000&before=1584590400&after=1583985600&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 13
https://api.pushshift.io/reddit/search/comme

https://api.pushshift.io/reddit/search/comment/?subreddit=france&sort=asc&size=1000&before=1580965200&after=1580360400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 7
https://api.pushshift.io/reddit/search/comment/?subreddit=france&sort=asc&size=1000&before=1581570000&after=1580965200&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 8
https://api.pushshift.io/reddit/search/comment/?subreddit=france&sort=asc&size=1000&before=1582174800&after=1581570000&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 9
https://api.pushshift.io/reddit/search/comment/?subreddit=france&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 10
https://api.pushshift.io/reddit/search/comm

https://api.pushshift.io/reddit/search/submission/?subreddit=unitedstatesofamerica&sort=asc&size=1000&before=1584590400&after=1583985600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 14
https://api.pushshift.io/reddit/search/submission/?subreddit=unitedstatesofamerica&sort=asc&size=1000&before=1585195200&after=1584590400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 15
https://api.pushshift.io/reddit/search/submission/?subreddit=unitedstatesofamerica&sort=asc&size=1000&before=1585800000&after=1585195200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 16
https://api.pushshift.io/reddit/search/submission/?subreddit=unitedstatesofamerica&sort=asc&size=1000&before=1586404800&after=1585800000&fields=author,id,selftext,permalink,title,created_

https://api.pushshift.io/reddit/search/submission/?subreddit=canada&sort=asc&size=1000&before=1580965200&after=1580360400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 7
https://api.pushshift.io/reddit/search/submission/?subreddit=canada&sort=asc&size=1000&before=1581570000&after=1580965200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 8
https://api.pushshift.io/reddit/search/submission/?subreddit=canada&sort=asc&size=1000&before=1582174800&after=1581570000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 9
https://api.pushshift.io/reddit/search/submission/?subreddit=canada&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%2

https://api.pushshift.io/reddit/search/comment/?subreddit=canada&sort=asc&size=1000&before=1585195200&after=1584708783&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 22
https://api.pushshift.io/reddit/search/comment/?subreddit=canada&sort=asc&size=1000&before=1585195200&after=1584810692&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 23
https://api.pushshift.io/reddit/search/comment/?subreddit=canada&sort=asc&size=1000&before=1585195200&after=1584927270&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 24
https://api.pushshift.io/reddit/search/comment/?subreddit=canada&sort=asc&size=1000&before=1585195200&after=1585058121&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 25
https://api.pushshift.io/reddit/search/c

https://api.pushshift.io/reddit/search/submission/?subreddit=onguardforthee&sort=asc&size=1000&before=1581570000&after=1580965200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 8
https://api.pushshift.io/reddit/search/submission/?subreddit=onguardforthee&sort=asc&size=1000&before=1582174800&after=1581570000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 9
https://api.pushshift.io/reddit/search/submission/?subreddit=onguardforthee&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 10
https://api.pushshift.io/reddit/search/submission/?subreddit=onguardforthee&sort=asc&size=1000&before=1583384400&after=1582779600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavir

https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusus&sort=asc&size=1000&before=1578546000&after=1577941200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 3
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusus&sort=asc&size=1000&before=1579150800&after=1578546000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 4
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusus&sort=asc&size=1000&before=1579755600&after=1579150800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 5
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusus&sort=asc&size=1000&before=1580360400&after=1579755600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|co

https://api.pushshift.io/reddit/search/comment/?subreddit=coronavirusus&sort=asc&size=1000&before=1582174800&after=1581570000&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 9
https://api.pushshift.io/reddit/search/comment/?subreddit=coronavirusus&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 10
https://api.pushshift.io/reddit/search/comment/?subreddit=coronavirusus&sort=asc&size=1000&before=1583384400&after=1582779600&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 11
https://api.pushshift.io/reddit/search/comment/?subreddit=coronavirusus&sort=asc&size=1000&before=1583384400&after=1583275386&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 12
https://api.p

https://api.pushshift.io/reddit/search/submission/?subreddit=canadacoronavirus&sort=asc&size=1000&before=1579150800&after=1578546000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 4
https://api.pushshift.io/reddit/search/submission/?subreddit=canadacoronavirus&sort=asc&size=1000&before=1579755600&after=1579150800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 5
https://api.pushshift.io/reddit/search/submission/?subreddit=canadacoronavirus&sort=asc&size=1000&before=1580360400&after=1579755600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 6
https://api.pushshift.io/reddit/search/submission/?subreddit=canadacoronavirus&sort=asc&size=1000&before=1580965200&after=1580360400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-1

https://api.pushshift.io/reddit/search/comment/?subreddit=canadacoronavirus&sort=asc&size=1000&before=1585800000&after=1585678708&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 18
https://api.pushshift.io/reddit/search/comment/?subreddit=canadacoronavirus&sort=asc&size=1000&before=1586404800&after=1585800000&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 19
https://api.pushshift.io/reddit/search/comment/?subreddit=canadacoronavirus&sort=asc&size=1000&before=1586404800&after=1586294606&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 20
https://api.pushshift.io/reddit/search/comment/?subreddit=canadacoronavirus&sort=asc&size=1000&before=1587009600&after=1586404800&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request #

https://api.pushshift.io/reddit/search/comment/?subreddit=turkey&sort=asc&size=1000&before=1583384400&after=1582779600&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 11
https://api.pushshift.io/reddit/search/comment/?subreddit=turkey&sort=asc&size=1000&before=1583985600&after=1583384400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 12
https://api.pushshift.io/reddit/search/comment/?subreddit=turkey&sort=asc&size=1000&before=1584590400&after=1583985600&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 13
https://api.pushshift.io/reddit/search/comment/?subreddit=turkey&sort=asc&size=1000&before=1585195200&after=1584590400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 14
https://api.pushshift.io/reddit/search/c

https://api.pushshift.io/reddit/search/comment/?subreddit=russia&sort=asc&size=1000&before=1581570000&after=1580965200&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 8
https://api.pushshift.io/reddit/search/comment/?subreddit=russia&sort=asc&size=1000&before=1582174800&after=1581570000&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 9
https://api.pushshift.io/reddit/search/comment/?subreddit=russia&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 10
https://api.pushshift.io/reddit/search/comment/?subreddit=russia&sort=asc&size=1000&before=1583384400&after=1582779600&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 11
https://api.pushshift.io/reddit/search/com

https://api.pushshift.io/reddit/search/comment/?subreddit=brasil&sort=asc&size=1000&before=1579755600&after=1579150800&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 5
https://api.pushshift.io/reddit/search/comment/?subreddit=brasil&sort=asc&size=1000&before=1580360400&after=1579755600&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 6
https://api.pushshift.io/reddit/search/comment/?subreddit=brasil&sort=asc&size=1000&before=1580965200&after=1580360400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 7
https://api.pushshift.io/reddit/search/comment/?subreddit=brasil&sort=asc&size=1000&before=1581570000&after=1580965200&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 8
https://api.pushshift.io/reddit/search/comme

https://api.pushshift.io/reddit/search/submission/?subreddit=belgium&sort=asc&size=1000&before=1587009600&after=1586404800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 17
https://api.pushshift.io/reddit/search/submission/?subreddit=belgium&sort=asc&size=1000&before=1587583751&after=1587009600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 18
SUBREDDIT: r/belgium
submission
selftext
138
https://api.pushshift.io/reddit/search/comment/?subreddit=belgium&sort=asc&size=1000&before=1577336400&after=1576731600&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 1
https://api.pushshift.io/reddit/search/comment/?subreddit=belgium&sort=asc&size=1000&before=1577941200&after=1577336400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covi

https://api.pushshift.io/reddit/search/submission/?subreddit=thenetherlands&sort=asc&size=1000&before=1583985600&after=1583384400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 12
https://api.pushshift.io/reddit/search/submission/?subreddit=thenetherlands&sort=asc&size=1000&before=1584590400&after=1583985600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 13
https://api.pushshift.io/reddit/search/submission/?subreddit=thenetherlands&sort=asc&size=1000&before=1585195200&after=1584590400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 14
https://api.pushshift.io/reddit/search/submission/?subreddit=thenetherlands&sort=asc&size=1000&before=1585800000&after=1585195200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronav

bad response code: 429
https://api.pushshift.io/reddit/search/submission/?subreddit=portugal&sort=asc&size=1000&before=1580360400&after=1579755600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 7
https://api.pushshift.io/reddit/search/submission/?subreddit=portugal&sort=asc&size=1000&before=1580965200&after=1580360400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 8
https://api.pushshift.io/reddit/search/submission/?subreddit=portugal&sort=asc&size=1000&before=1581570000&after=1580965200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 9
https://api.pushshift.io/reddit/search/submission/?subreddit=portugal&sort=asc&size=1000&before=1582174800&after=1581570000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus

https://api.pushshift.io/reddit/search/submission/?subreddit=switzerland&sort=asc&size=1000&before=1577941200&after=1577336400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 2
https://api.pushshift.io/reddit/search/submission/?subreddit=switzerland&sort=asc&size=1000&before=1578546000&after=1577941200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 3
https://api.pushshift.io/reddit/search/submission/?subreddit=switzerland&sort=asc&size=1000&before=1579150800&after=1578546000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 4
https://api.pushshift.io/reddit/search/submission/?subreddit=switzerland&sort=asc&size=1000&before=1579755600&after=1579150800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|cov

https://api.pushshift.io/reddit/search/comment/?subreddit=switzerland&sort=asc&size=1000&before=1585195200&after=1584590400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 15
https://api.pushshift.io/reddit/search/comment/?subreddit=switzerland&sort=asc&size=1000&before=1585800000&after=1585195200&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 16
https://api.pushshift.io/reddit/search/comment/?subreddit=switzerland&sort=asc&size=1000&before=1586404800&after=1585800000&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 17
https://api.pushshift.io/reddit/search/comment/?subreddit=switzerland&sort=asc&size=1000&before=1587009600&after=1586404800&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 18
https://api.pushshif

https://api.pushshift.io/reddit/search/comment/?subreddit=peru&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 10
bad response code: 429
https://api.pushshift.io/reddit/search/comment/?subreddit=peru&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 11
https://api.pushshift.io/reddit/search/comment/?subreddit=peru&sort=asc&size=1000&before=1583384400&after=1582779600&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 12
https://api.pushshift.io/reddit/search/comment/?subreddit=peru&sort=asc&size=1000&before=1583985600&after=1583384400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 13
https://api.pushshift.io/

https://api.pushshift.io/reddit/search/comment/?subreddit=coronavirusca&sort=asc&size=1000&before=1579755600&after=1579150800&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 5
https://api.pushshift.io/reddit/search/comment/?subreddit=coronavirusca&sort=asc&size=1000&before=1580360400&after=1579755600&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 6
https://api.pushshift.io/reddit/search/comment/?subreddit=coronavirusca&sort=asc&size=1000&before=1580965200&after=1580360400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 7
bad response code: 429
https://api.pushshift.io/reddit/search/comment/?subreddit=coronavirusca&sort=asc&size=1000&before=1580965200&after=1580360400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" reque

https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusuk&sort=asc&size=1000&before=1587583751&after=1587009600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 18
SUBREDDIT: r/coronavirusuk
submission
selftext
2052
https://api.pushshift.io/reddit/search/comment/?subreddit=coronavirusuk&sort=asc&size=1000&before=1577336400&after=1576731600&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 1
https://api.pushshift.io/reddit/search/comment/?subreddit=coronavirusuk&sort=asc&size=1000&before=1577941200&after=1577336400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 2
https://api.pushshift.io/reddit/search/comment/?subreddit=coronavirusuk&sort=asc&size=1000&before=1578546000&after=1577941200&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavir

https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusNewYork&sort=asc&size=1000&before=1578546000&after=1577941200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 3
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusNewYork&sort=asc&size=1000&before=1579150800&after=1578546000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 4
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusNewYork&sort=asc&size=1000&before=1579755600&after=1579150800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 5
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusNewYork&sort=asc&size=1000&before=1580360400&after=1579755600&fields=author,id,selftext,permalink,title,created_utc&q=covid|cov

https://api.pushshift.io/reddit/search/comment/?subreddit=coronavirusNewYork&sort=asc&size=1000&before=1586404800&after=1585800000&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 17
https://api.pushshift.io/reddit/search/comment/?subreddit=coronavirusNewYork&sort=asc&size=1000&before=1587009600&after=1586404800&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 18
https://api.pushshift.io/reddit/search/comment/?subreddit=coronavirusNewYork&sort=asc&size=1000&before=1587583751&after=1587009600&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 19
SUBREDDIT: r/coronavirusNewYork
comment
body
1605
https://api.pushshift.io/reddit/search/submission/?subreddit=newyork&sort=asc&size=1000&before=1577336400&after=1576731600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|

https://api.pushshift.io/reddit/search/comment/?subreddit=newyork&sort=asc&size=1000&before=1583985600&after=1583384400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 12
bad response code: 429
https://api.pushshift.io/reddit/search/comment/?subreddit=newyork&sort=asc&size=1000&before=1583985600&after=1583384400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 13
https://api.pushshift.io/reddit/search/comment/?subreddit=newyork&sort=asc&size=1000&before=1584590400&after=1583985600&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 14
https://api.pushshift.io/reddit/search/comment/?subreddit=newyork&sort=asc&size=1000&before=1585195200&after=1584590400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 15
https://api.p

https://api.pushshift.io/reddit/search/comment/?subreddit=newjersey&sort=asc&size=1000&before=1580965200&after=1580360400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 7
https://api.pushshift.io/reddit/search/comment/?subreddit=newjersey&sort=asc&size=1000&before=1581570000&after=1580965200&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 8
https://api.pushshift.io/reddit/search/comment/?subreddit=newjersey&sort=asc&size=1000&before=1582174800&after=1581570000&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 9
https://api.pushshift.io/reddit/search/comment/?subreddit=newjersey&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 10
https://api.pushshift.io/reddit

https://api.pushshift.io/reddit/search/comment/?subreddit=massachusetts&sort=asc&size=1000&before=1577941200&after=1577336400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 2
https://api.pushshift.io/reddit/search/comment/?subreddit=massachusetts&sort=asc&size=1000&before=1578546000&after=1577941200&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 3
https://api.pushshift.io/reddit/search/comment/?subreddit=massachusetts&sort=asc&size=1000&before=1579150800&after=1578546000&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 4
https://api.pushshift.io/reddit/search/comment/?subreddit=massachusetts&sort=asc&size=1000&before=1579755600&after=1579150800&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 5
https://api.push

https://api.pushshift.io/reddit/search/submission/?subreddit=California&sort=asc&size=1000&before=1585800000&after=1585195200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 16
https://api.pushshift.io/reddit/search/submission/?subreddit=California&sort=asc&size=1000&before=1586404800&after=1585800000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 17
https://api.pushshift.io/reddit/search/submission/?subreddit=California&sort=asc&size=1000&before=1587009600&after=1586404800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 18
https://api.pushshift.io/reddit/search/submission/?subreddit=California&sort=asc&size=1000&before=1587583751&after=1587009600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covi

https://api.pushshift.io/reddit/search/submission/?subreddit=Pennsylvania&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 11
https://api.pushshift.io/reddit/search/submission/?subreddit=Pennsylvania&sort=asc&size=1000&before=1583384400&after=1582779600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 12
https://api.pushshift.io/reddit/search/submission/?subreddit=Pennsylvania&sort=asc&size=1000&before=1583985600&after=1583384400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 13
https://api.pushshift.io/reddit/search/submission/?subreddit=Pennsylvania&sort=asc&size=1000&before=1584590400&after=1583985600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|cor

https://api.pushshift.io/reddit/search/submission/?subreddit=illinois&sort=asc&size=1000&before=1580360400&after=1579755600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 6
https://api.pushshift.io/reddit/search/submission/?subreddit=illinois&sort=asc&size=1000&before=1580965200&after=1580360400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 7
https://api.pushshift.io/reddit/search/submission/?subreddit=illinois&sort=asc&size=1000&before=1581570000&after=1580965200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 8
https://api.pushshift.io/reddit/search/submission/?subreddit=illinois&sort=asc&size=1000&before=1582174800&after=1581570000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemi

SUBREDDIT: r/illinois
comment
body
854
https://api.pushshift.io/reddit/search/submission/?subreddit=michigan&sort=asc&size=1000&before=1577336400&after=1576731600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 1
https://api.pushshift.io/reddit/search/submission/?subreddit=michigan&sort=asc&size=1000&before=1577941200&after=1577336400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 2
https://api.pushshift.io/reddit/search/submission/?subreddit=michigan&sort=asc&size=1000&before=1578546000&after=1577941200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 3
https://api.pushshift.io/reddit/search/submission/?subreddit=michigan&sort=asc&size=1000&before=1579150800&after=1578546000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covi

https://api.pushshift.io/reddit/search/comment/?subreddit=michigan&sort=asc&size=1000&before=1585195200&after=1584590400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 15
https://api.pushshift.io/reddit/search/comment/?subreddit=michigan&sort=asc&size=1000&before=1585195200&after=1585170063&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 16
https://api.pushshift.io/reddit/search/comment/?subreddit=michigan&sort=asc&size=1000&before=1585800000&after=1585195200&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 17
https://api.pushshift.io/reddit/search/comment/?subreddit=michigan&sort=asc&size=1000&before=1585800000&after=1585691597&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 18
https://api.pushshift.io/reddit/

https://api.pushshift.io/reddit/search/comment/?subreddit=florida&sort=asc&size=1000&before=1580965200&after=1580360400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 7
https://api.pushshift.io/reddit/search/comment/?subreddit=florida&sort=asc&size=1000&before=1581570000&after=1580965200&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 8
bad response code: 429
https://api.pushshift.io/reddit/search/comment/?subreddit=florida&sort=asc&size=1000&before=1581570000&after=1580965200&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 9
https://api.pushshift.io/reddit/search/comment/?subreddit=florida&sort=asc&size=1000&before=1582174800&after=1581570000&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 10
https://api.push

https://api.pushshift.io/reddit/search/comment/?subreddit=louisiana&sort=asc&size=1000&before=1577941200&after=1577336400&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 2
https://api.pushshift.io/reddit/search/comment/?subreddit=louisiana&sort=asc&size=1000&before=1578546000&after=1577941200&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 3
bad response code: 429
https://api.pushshift.io/reddit/search/comment/?subreddit=louisiana&sort=asc&size=1000&before=1578546000&after=1577941200&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 4
https://api.pushshift.io/reddit/search/comment/?subreddit=louisiana&sort=asc&size=1000&before=1579150800&after=1578546000&fields=author,body,created_utc,permalink&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 5
https://a

https://api.pushshift.io/reddit/search/submission/?subreddit=texas&sort=asc&size=1000&before=1585800000&after=1585195200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 15
https://api.pushshift.io/reddit/search/submission/?subreddit=texas&sort=asc&size=1000&before=1586404800&after=1585800000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 16
https://api.pushshift.io/reddit/search/submission/?subreddit=texas&sort=asc&size=1000&before=1587009600&after=1586404800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 17
https://api.pushshift.io/reddit/search/submission/?subreddit=texas&sort=asc&size=1000&before=1587583751&after=1587009600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20

https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusAZ&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 11
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusAZ&sort=asc&size=1000&before=1583384400&after=1582779600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 12
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusAZ&sort=asc&size=1000&before=1583985600&after=1583384400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 13
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusAZ&sort=asc&size=1000&before=1584590400&after=1583985600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus

# Section 2: Submission analysis: title vs body of post
Here, we want to see how number of posts differ if we include those posts with titles that have keywords in them. Though the titles are not very long, it will be important to see if these extra posts will bring about substantially more comments.

In [10]:
def retrieve_subreddit_data_title(subreddit, retrieval_type, dirname, fields, keywords, dates):
#     today = datetime.datetime.utcnow()
#     today_timestamp = int((today - datetime.datetime(1970, 1, 1)).total_seconds())
#     before_date = today_timestamp
    
    fields = ",".join(fields)
    query_words = "|".join(keywords)
    text_tag = 'body' if retrieval_type == 'comment' else 'selftext'
   
    count = 0
    total_valid = 0
    diachronic_valid = []
    output_filename = 'r.' + subreddit + '.' + retrieval_type + 'include_title=True.json'
    with open(dirname + output_filename, 'w') as fout:
        for i in range(len(dates) - 1):
            after = dates[i] #date farther from present
            before = dates[i+1] #date closer to present
            epoch_valid = 0 # number of data points in the given time period
            done = False

            while not done:
                count += 1
                query = f"{PUSHSHIFT_ENDPOINT}{retrieval_type}/?subreddit={subreddit}&sort=asc&size={MAX_RETRIEVED_ELEMENTS}" +\
                f"&before={before}&after={after}&fields={fields}&q={query_words}"

#                 query = PUSHSHIFT_ENDPOINT + retrieval_type + '/?subreddit=' + subreddit + \
#                 '&sort=desc&size=' + str(MAX_RETRIEVED_ELEMENTS) + '&before=' + str(before) + '&fields=' + fields + \
#                 "&q=" + query_words + 

                print(query, 'request #', count)
                sys.stdout.flush()
                try:
                    r = requests.get(query)
                except:
                    print('exception thrown...')
                    time.sleep(5)
                    continue

                if r.status_code != 200:
                    print('bad response code:', r.status_code)
                    if r.status_code == 429:
                        time.sleep(5)
                        continue
                    after += 86400  # slight change to the query
                    continue  # retry

    # record a submission/comment only if it's not empty

                for i, element in enumerate(r.json()['data']):
                    latest = element['created_utc']
#                     if 'created_utc' in element.keys():
#                         before_date = element['created_utc']
#                         if before_date < EARLIEST_DATE:
#                             done = True
#                             break
                    try:
                        is_valid_body, formatted = valid_text(element[text_tag])
                        if retrieval_type == 'submission':
                            is_valid_title, formatted_title = valid_text(element['title'])
                        else:
                            is_valid_title, formatted_title = False, ''
                    except KeyError:
                        continue
                    if is_valid_body or is_valid_title:
                        element[text_tag] = formatted
                        json.dump(element, fout)
                        fout.write('\n')
                        total_valid += 1
                        epoch_valid += 1
            
                if len(r.json()['data']) < MAX_RETRIEVED_ELEMENTS:  # end of data
                    done = True
                else:
                    after = latest
            diachronic_valid.append(epoch_valid)
                
    print("SUBREDDIT: r/{}".format(subreddit))
    print(retrieval_type)
    print(text_tag)
    print(total_valid)
    return total_valid, diachronic_valid

In [11]:
retrieval_types = ['submission']
output_filename = 'overall_stats_titled.csv'
diachronic_filename = 'diachronic_stats_titled.csv'
with open(DIRNAME + output_filename, 'w') as fout:
    with open(DIRNAME + diachronic_filename, 'w') as dfout:
        fout.write('Subreddit,Content Type,Sample Size\n')
        dfout.write('Subreddit,Content Type, Week Start, Week End, Sample Size\n')
        for sub in subreddits:
            for r_type in retrieval_types:
                total_valid, diachronic_valid = retrieve_subreddit_data_title(sub, r_type, DIRNAME, fields[r_type], keywords, epochs)
                fout.write(f"{sub},{r_type},{total_valid}\n")
                for date_idx in range(len(dates)-1):
                    dfout.write(f"{sub},{r_type},{dates[date_idx]},{dates[date_idx+1]},{diachronic_valid[date_idx]}\n")

https://api.pushshift.io/reddit/search/submission/?subreddit=europe&sort=asc&size=1000&before=1577336400&after=1576731600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 1
https://api.pushshift.io/reddit/search/submission/?subreddit=europe&sort=asc&size=1000&before=1577941200&after=1577336400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 2
https://api.pushshift.io/reddit/search/submission/?subreddit=europe&sort=asc&size=1000&before=1578546000&after=1577941200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 3
https://api.pushshift.io/reddit/search/submission/?subreddit=europe&sort=asc&size=1000&before=1579150800&after=1578546000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%2

https://api.pushshift.io/reddit/search/submission/?subreddit=spain&sort=asc&size=1000&before=1585800000&after=1585195200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 15
https://api.pushshift.io/reddit/search/submission/?subreddit=spain&sort=asc&size=1000&before=1586404800&after=1585800000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 16
bad response code: 429
https://api.pushshift.io/reddit/search/submission/?subreddit=spain&sort=asc&size=1000&before=1586404800&after=1585800000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 17
https://api.pushshift.io/reddit/search/submission/?subreddit=spain&sort=asc&size=1000&before=1587009600&after=1586404800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|c

https://api.pushshift.io/reddit/search/submission/?subreddit=unitedkingdom&sort=asc&size=1000&before=1582174800&after=1581570000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 10
https://api.pushshift.io/reddit/search/submission/?subreddit=unitedkingdom&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 11
https://api.pushshift.io/reddit/search/submission/?subreddit=unitedkingdom&sort=asc&size=1000&before=1583384400&after=1582779600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 12
https://api.pushshift.io/reddit/search/submission/?subreddit=unitedkingdom&sort=asc&size=1000&before=1583985600&after=1583384400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus

https://api.pushshift.io/reddit/search/submission/?subreddit=iran&sort=asc&size=1000&before=1579150800&after=1578546000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 4
https://api.pushshift.io/reddit/search/submission/?subreddit=iran&sort=asc&size=1000&before=1579755600&after=1579150800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 5
https://api.pushshift.io/reddit/search/submission/?subreddit=iran&sort=asc&size=1000&before=1580360400&after=1579755600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 6
https://api.pushshift.io/reddit/search/submission/?subreddit=iran&sort=asc&size=1000&before=1580965200&after=1580360400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" 

https://api.pushshift.io/reddit/search/submission/?subreddit=japan&sort=asc&size=1000&before=1586404800&after=1585800000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 17
https://api.pushshift.io/reddit/search/submission/?subreddit=japan&sort=asc&size=1000&before=1587009600&after=1586404800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 18
https://api.pushshift.io/reddit/search/submission/?subreddit=japan&sort=asc&size=1000&before=1587583751&after=1587009600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 19
SUBREDDIT: r/japan
submission
selftext
760
https://api.pushshift.io/reddit/search/submission/?subreddit=china&sort=asc&size=1000&before=1577336400&after=1576731600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|

https://api.pushshift.io/reddit/search/submission/?subreddit=singapore&sort=asc&size=1000&before=1583384400&after=1582779600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 11
https://api.pushshift.io/reddit/search/submission/?subreddit=singapore&sort=asc&size=1000&before=1583985600&after=1583384400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 12
https://api.pushshift.io/reddit/search/submission/?subreddit=singapore&sort=asc&size=1000&before=1584590400&after=1583985600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 13
https://api.pushshift.io/reddit/search/submission/?subreddit=singapore&sort=asc&size=1000&before=1585195200&after=1584590400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|

https://api.pushshift.io/reddit/search/submission/?subreddit=korea&sort=asc&size=1000&before=1579150800&after=1578546000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 4
https://api.pushshift.io/reddit/search/submission/?subreddit=korea&sort=asc&size=1000&before=1579755600&after=1579150800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 5
https://api.pushshift.io/reddit/search/submission/?subreddit=korea&sort=asc&size=1000&before=1580360400&after=1579755600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 6
https://api.pushshift.io/reddit/search/submission/?subreddit=korea&sort=asc&size=1000&before=1580965200&after=1580360400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20vir

https://api.pushshift.io/reddit/search/submission/?subreddit=france&sort=asc&size=1000&before=1587009600&after=1586404800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 18
https://api.pushshift.io/reddit/search/submission/?subreddit=france&sort=asc&size=1000&before=1587583751&after=1587009600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 19
SUBREDDIT: r/france
submission
selftext
348
https://api.pushshift.io/reddit/search/submission/?subreddit=unitedstatesofamerica&sort=asc&size=1000&before=1577336400&after=1576731600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 1
https://api.pushshift.io/reddit/search/submission/?subreddit=unitedstatesofamerica&sort=asc&size=1000&before=1577941200&after=1577336400&fields=author,id,selftext,permalink,t

https://api.pushshift.io/reddit/search/submission/?subreddit=canada&sort=asc&size=1000&before=1583384400&after=1582779600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 11
https://api.pushshift.io/reddit/search/submission/?subreddit=canada&sort=asc&size=1000&before=1583985600&after=1583384400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 12
https://api.pushshift.io/reddit/search/submission/?subreddit=canada&sort=asc&size=1000&before=1584590400&after=1583985600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 13
https://api.pushshift.io/reddit/search/submission/?subreddit=canada&sort=asc&size=1000&before=1585195200&after=1584590400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"th

bad response code: 429
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusus&sort=asc&size=1000&before=1579150800&after=1578546000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 5
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusus&sort=asc&size=1000&before=1579755600&after=1579150800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 6
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusus&sort=asc&size=1000&before=1580360400&after=1579755600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 7
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusus&sort=asc&size=1000&before=1580965200&after=1580360400&fields=author,id,selftext,permalink,title,created_utc&q=covid|

https://api.pushshift.io/reddit/search/submission/?subreddit=canadacoronavirus&sort=asc&size=1000&before=1582174800&after=1581570000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 9
https://api.pushshift.io/reddit/search/submission/?subreddit=canadacoronavirus&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 10
https://api.pushshift.io/reddit/search/submission/?subreddit=canadacoronavirus&sort=asc&size=1000&before=1583384400&after=1582779600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 11
https://api.pushshift.io/reddit/search/submission/?subreddit=canadacoronavirus&sort=asc&size=1000&before=1583985600&after=1583384400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid

https://api.pushshift.io/reddit/search/submission/?subreddit=russia&sort=asc&size=1000&before=1579150800&after=1578546000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 4
https://api.pushshift.io/reddit/search/submission/?subreddit=russia&sort=asc&size=1000&before=1579755600&after=1579150800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 5
https://api.pushshift.io/reddit/search/submission/?subreddit=russia&sort=asc&size=1000&before=1580360400&after=1579755600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 6
https://api.pushshift.io/reddit/search/submission/?subreddit=russia&sort=asc&size=1000&before=1580965200&after=1580360400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%2

https://api.pushshift.io/reddit/search/submission/?subreddit=brasil&sort=asc&size=1000&before=1586404800&after=1585800000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 17
https://api.pushshift.io/reddit/search/submission/?subreddit=brasil&sort=asc&size=1000&before=1587009600&after=1586404800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 18
https://api.pushshift.io/reddit/search/submission/?subreddit=brasil&sort=asc&size=1000&before=1587583751&after=1587009600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 19
SUBREDDIT: r/brasil
submission
selftext
194
https://api.pushshift.io/reddit/search/submission/?subreddit=belgium&sort=asc&size=1000&before=1577336400&after=1576731600&fields=author,id,selftext,permalink,title,created_utc&q=covid|cov

https://api.pushshift.io/reddit/search/submission/?subreddit=thenetherlands&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 11
https://api.pushshift.io/reddit/search/submission/?subreddit=thenetherlands&sort=asc&size=1000&before=1583384400&after=1582779600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 12
https://api.pushshift.io/reddit/search/submission/?subreddit=thenetherlands&sort=asc&size=1000&before=1583985600&after=1583384400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 13
https://api.pushshift.io/reddit/search/submission/?subreddit=thenetherlands&sort=asc&size=1000&before=1584590400&after=1583985600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronav

https://api.pushshift.io/reddit/search/submission/?subreddit=switzerland&sort=asc&size=1000&before=1579755600&after=1579150800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 5
bad response code: 429
https://api.pushshift.io/reddit/search/submission/?subreddit=switzerland&sort=asc&size=1000&before=1579755600&after=1579150800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 6
https://api.pushshift.io/reddit/search/submission/?subreddit=switzerland&sort=asc&size=1000&before=1580360400&after=1579755600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 7
https://api.pushshift.io/reddit/search/submission/?subreddit=switzerland&sort=asc&size=1000&before=1580965200&after=1580360400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19

https://api.pushshift.io/reddit/search/submission/?subreddit=peru&sort=asc&size=1000&before=1586404800&after=1585800000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 17
https://api.pushshift.io/reddit/search/submission/?subreddit=peru&sort=asc&size=1000&before=1587009600&after=1586404800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 18
https://api.pushshift.io/reddit/search/submission/?subreddit=peru&sort=asc&size=1000&before=1587583751&after=1587009600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 19
SUBREDDIT: r/peru
submission
selftext
30
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusca&sort=asc&size=1000&before=1577336400&after=1576731600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-

https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusuk&sort=asc&size=1000&before=1582174800&after=1581570000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 10
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusuk&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 11
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusuk&sort=asc&size=1000&before=1583384400&after=1582779600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 12
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusuk&sort=asc&size=1000&before=1583985600&after=1583384400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus

https://api.pushshift.io/reddit/search/submission/?subreddit=newyork&sort=asc&size=1000&before=1578546000&after=1577941200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 3
https://api.pushshift.io/reddit/search/submission/?subreddit=newyork&sort=asc&size=1000&before=1579150800&after=1578546000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 4
https://api.pushshift.io/reddit/search/submission/?subreddit=newyork&sort=asc&size=1000&before=1579755600&after=1579150800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 5
https://api.pushshift.io/reddit/search/submission/?subreddit=newyork&sort=asc&size=1000&before=1580360400&after=1579755600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"t

https://api.pushshift.io/reddit/search/submission/?subreddit=newjersey&sort=asc&size=1000&before=1585800000&after=1585195200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 16
https://api.pushshift.io/reddit/search/submission/?subreddit=newjersey&sort=asc&size=1000&before=1586404800&after=1585800000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 17
https://api.pushshift.io/reddit/search/submission/?subreddit=newjersey&sort=asc&size=1000&before=1587009600&after=1586404800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 18
https://api.pushshift.io/reddit/search/submission/?subreddit=newjersey&sort=asc&size=1000&before=1587583751&after=1587009600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|

https://api.pushshift.io/reddit/search/submission/?subreddit=California&sort=asc&size=1000&before=1582174800&after=1581570000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 9
bad response code: 429
https://api.pushshift.io/reddit/search/submission/?subreddit=California&sort=asc&size=1000&before=1582174800&after=1581570000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 10
https://api.pushshift.io/reddit/search/submission/?subreddit=California&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 11
https://api.pushshift.io/reddit/search/submission/?subreddit=California&sort=asc&size=1000&before=1583384400&after=1582779600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|c

bad response code: 429
https://api.pushshift.io/reddit/search/submission/?subreddit=illinois&sort=asc&size=1000&before=1577336400&after=1576731600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 2
https://api.pushshift.io/reddit/search/submission/?subreddit=illinois&sort=asc&size=1000&before=1577941200&after=1577336400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 3
https://api.pushshift.io/reddit/search/submission/?subreddit=illinois&sort=asc&size=1000&before=1578546000&after=1577941200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 4
https://api.pushshift.io/reddit/search/submission/?subreddit=illinois&sort=asc&size=1000&before=1579150800&after=1578546000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus

https://api.pushshift.io/reddit/search/submission/?subreddit=michigan&sort=asc&size=1000&before=1584590400&after=1583985600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 14
https://api.pushshift.io/reddit/search/submission/?subreddit=michigan&sort=asc&size=1000&before=1585195200&after=1584590400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 15
https://api.pushshift.io/reddit/search/submission/?subreddit=michigan&sort=asc&size=1000&before=1585800000&after=1585195200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 16
https://api.pushshift.io/reddit/search/submission/?subreddit=michigan&sort=asc&size=1000&before=1586404800&after=1585800000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pand

https://api.pushshift.io/reddit/search/submission/?subreddit=louisiana&sort=asc&size=1000&before=1581570000&after=1580965200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 8
https://api.pushshift.io/reddit/search/submission/?subreddit=louisiana&sort=asc&size=1000&before=1582174800&after=1581570000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 9
https://api.pushshift.io/reddit/search/submission/?subreddit=louisiana&sort=asc&size=1000&before=1582779600&after=1582174800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 10
https://api.pushshift.io/reddit/search/submission/?subreddit=louisiana&sort=asc&size=1000&before=1583384400&after=1582779600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pa

https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusAZ&sort=asc&size=1000&before=1577941200&after=1577336400&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 2
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusAZ&sort=asc&size=1000&before=1578546000&after=1577941200&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 3
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusAZ&sort=asc&size=1000&before=1579150800&after=1578546000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 4
https://api.pushshift.io/reddit/search/submission/?subreddit=coronavirusAZ&sort=asc&size=1000&before=1579755600&after=1579150800&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|co

# Section 3: Extracting all comments for a submission
Instead of just broadly searching for all comments relating to coronavirus, we may also want to harvest data from submissions that talk about coronavirus, and use all comments as data. One potential problem is that people go off-topic a lot, especially at deeper levels of a discussion thread. To mitigate this, we can see the difference in the number of comments available at the first level of the comment tree vs all comments associated with a given post. 

In [80]:
def run_query(query, count, after=None):
    done = False
    while not done:
        print(query, 'request #', count)
        sys.stdout.flush()
        try:
            r = requests.get(query)
        except:
            print('exception thrown...')
            time.sleep(5)
            continue

        if r.status_code != 200:
            print('bad response code:', r.status_code)
            if r.status_code == 429:
                time.sleep(5)
            continue  # retry
        done = True
    return r
# =============================
    

In [83]:
def get_data(retrieval_type, subreddit, before, after, fields, query_words):
    done = False
    text_tag = 'body' if retrieval_type == 'comment' else 'selftext'
    ids = []
    count = 0 
    while not done:
        count += 1
        query = f"{PUSHSHIFT_ENDPOINT}{retrieval_type}/?subreddit={subreddit}&sort=asc&size={MAX_RETRIEVED_ELEMENTS}" +\
        f"&before={before}&after={after}&fields={fields}&q={query_words}"
        r = run_query(query, count)

        for i, element in enumerate(r.json()['data']):
            latest = element['created_utc']
            try:
                is_valid_body, formatted = valid_text(element[text_tag])
                if retrieval_type == 'submission':
                    is_valid_title, formatted_title = valid_text(element['title'])
                else:
                    is_valid_title, formatted_title = False, ''
            except KeyError:
                continue
            if is_valid_body or is_valid_title:
                ids.append(element['id'])
        if len(r.json()['data']) < MAX_RETRIEVED_ELEMENTS:  # end of data
            done = True
        else:
            after = latest
    return ids

In [84]:
def get_batches(comment_ids):
    num_batches = len(comment_ids) % MAX_RETRIEVED_ELEMENTS
    batches = []
    for i in range(num_batches):
        batches.append(comment_ids[i*MAX_RETRIEVED_ELEMENTS: (i+1)*MAX_RETRIEVED_ELEMENTS])
    return batches

In [76]:
def get_comments_for_post(post_id):
    done = False
    top_level = []
    all_comments = []
    count = 0
    while not done:
        count += 1
        query = f"https://api.pushshift.io/reddit/submission/comment_ids/{post_id}"  
        r = run_query(query, count)
        done = True
        query_batches = get_batches(r.json()['data'])
        for batch in query_batches:
            ids = 
            query = f"{PUSHSHIFT_ENDPOINT}comment/?sort=desc&ids={ids}"
            for com in r.json()['data']:
                is_valid_body, formatted = valid_text(com['body'])
                if is_valid_body:
                    if com['parent_id'].startswith('t3'):
                        top_level.append(com['id'])
                    all_comments.append(com['id'])
    return top_level, all_comments
    

In [77]:
def retrieve_subreddit_data_all_posts(subreddit, dirname, fields, keywords, dates):

    post_fields = ",".join(fields['submission'])
    comment_fields = ",".join(fields['comment'])

    query_words = "|".join(keywords)
#     text_tag = 'body' if retrieval_type == 'comment' else 'selftext'
   
    count = 0
    total_valid = 0
    diachronic_valid = [] # each element is a tuple of (posts, comments with keywords, comment w/ keywords, top level with, top level without, and total comments)
    output_filename = 'r.' + subreddit + '.' + '.all_data.json'
    
    with open(dirname + output_filename, 'w') as fout:
        for i in range(len(dates) - 1): # For each date
            after = dates[i] #date farther from present
            before = dates[i+1] #date closer to present
            epoch_valid = 0 # number of data points in the given time period
            
            # Get submission data
            submission_ids = get_data('submission', subreddit, before, after, post_fields, query_words)
            total_top = []
            total_comments = []
            for sub_id in submission_ids:
                top, all_comments = get_comments_for_post(sub_id)
                total_top.extend(top)
                total_comments.extend(all_comments)
            comment_ids = get_data('comment', subreddit, before, after, comment_fields, query_words)
            
            num_posts = len(submission_ids)
            comments_w_keywords = len(set(total_comments).intersection(set(comment_ids)))
            comments_wo_keywords = len(total_comments) - comments_w_keywords
            top_w_keywords = len(set(total_top).intersection(set(comment_ids)))
            top_wo_keywords = len(total_top) - top_w_keywords
            num_comments = len(comment_ids) + comments_wo_keywords
            
            total_data = num_posts + num_comments
            
            all_vals = [num_posts, comments_w_keywords, comments_wo_keywords, top_w_keywords, 
                        top_wo_keywords, num_comments, total_data]
            print(all_vals)
            print(type(all_vals))
            diachronic_valid.append(",".join(map(str, all_vals)))
            total_valid += total_data
                
#     print("SUBREDDIT: r/{}".format(subreddit))
#     print(retrieval_type)
#     print(text_tag)
#     print(total_valid)

    return total_valid, diachronic_valid

In [78]:
# retrieval_types = ['submission', 'comment']
country_subreddits = ['europe', 'spain', 'italy']#, 'unitedkingdom', 'germany', 'iran', 'japan', 'china', 'singapore',
#               'india', 'korea', 'france', 'unitedstatesofamerica', 'canada', 'onguardforthee', 'coronavirusus', 
#               'canadacoronavirus', 'turkey', 'russia', 'brasil', 'belgium', 'thenetherlands', 'portugal', 'switzerland', 
#               'peru', 'coronavirusuk']
# state_subreddits = ['coronavirusca', 'coronavirusNewYork', 'newyork', 'newjersey', 'massachusetts', 'California',  
#                     'Pennsylvania', 'illinois', 'michigan', 'florida', 'louisiana', 'texas', 'coronavirusAZ']  
fields = {'submission': ['author', 'id', 'selftext', 'permalink', 'title', 'created_utc'], 
          'comment': ['author', 'body', 'created_utc', 'permalink', 'parent_id', 'id']}
keywords = ['covid', 'covid-19', 'coronavirus', 'corona', 'covid19', "pandemic", '"the%20virus"']
#optionally: pandemic?
dates, epochs = get_dates(datetime.utcnow())
output_filename = 'overall_stats.csv'
diachronic_filename = 'diachronic_stats.csv'
with open(DIRNAME + output_filename, 'w') as fout:
    with open(DIRNAME + diachronic_filename, 'w') as dfout:
        fout.write('Subreddit,Sample Size\n')
        dfout.write('Subreddit,Week Start, Week End, Comments With Keywords, Comments without Keywords, ' +\
        'Top Level Comments With Keywords,Top Level Comments Without Keywords, Total Comments, Total Data\n')
        for sub in country_subreddits:
            total_valid, diachronic_valid = retrieve_subreddit_data_all_posts(sub, DIRNAME, fields, keywords, epochs)
            fout.write(f"{sub},{total_valid}\n")
            for date_idx in range(len(dates)-1):
                dfout.write(f"{sub},{dates[date_idx]},{dates[date_idx+1]},{diachronic_valid[date_idx]}\n")

https://api.pushshift.io/reddit/search/submission/?subreddit=europe&sort=asc&size=1000&before=1577336400&after=1576731600&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 1
bad response code: 502
https://api.pushshift.io/reddit/search/submission/?subreddit=europe&sort=asc&size=1000&before=1577336400&after=1576818000&fields=author,id,selftext,permalink,title,created_utc&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 2
https://api.pushshift.io/reddit/search/comment/?subreddit=europe&sort=asc&size=1000&before=1577336400&after=1576731600&fields=author,body,created_utc,permalink,parent_id,id&q=covid|covid-19|coronavirus|corona|covid19|pandemic|"the%20virus" request # 1
[0, 0, 0, 0, 0, 1, 1]
<class 'list'>
https://api.pushshift.io/reddit/search/submission/?subreddit=europe&sort=asc&size=1000&before=1577941200&after=1577336400&fields=author,id,selftext,permalink,title,created_utc&q=

TypeError: string indices must be integers

# EXTRA SPACE

In [38]:
keywords = ['covid', 'covid-19', 'coronavirus', 'corona', 'covid19', "pandemic", '"the%20virus"']
query = PUSHSHIFT_ENDPOINT + 'comment' + "/?q=" + "|".join(keywords)
r = requests.get(query)
print(r.status_code)
print(r.text)
r.json()['data']

200
{
    "data": [
        {
            "all_awardings": [],
            "associated_award": null,
            "author": "monkey_sage",
            "author_flair_background_color": "",
            "author_flair_css_class": null,
            "author_flair_richtext": [],
            "author_flair_template_id": null,
            "author_flair_text": "Wanting to Emigrate",
            "author_flair_text_color": "dark",
            "author_flair_type": "text",
            "author_fullname": "t2_g2zd9",
            "author_patreon_flair": false,
            "author_premium": false,
            "awarders": [],
            "body": "&gt; More recently, the PRC has emerged as a competitor to the global North and challenges the undisputed hegemony over the planet that europe and its settler-colonies have had over the word for at least two centuries. This creates a situation in which the old narrative of the \"yellow peril\" is easily revived. \n\nYou honestly believe the racist people in this c

[{'all_awardings': [],
  'associated_award': None,
  'author': 'monkey_sage',
  'author_flair_background_color': '',
  'author_flair_css_class': None,
  'author_flair_richtext': [],
  'author_flair_template_id': None,
  'author_flair_text': 'Wanting to Emigrate',
  'author_flair_text_color': 'dark',
  'author_flair_type': 'text',
  'author_fullname': 't2_g2zd9',
  'author_patreon_flair': False,
  'author_premium': False,
  'awarders': [],
  'body': '&gt; More recently, the PRC has emerged as a competitor to the global North and challenges the undisputed hegemony over the planet that europe and its settler-colonies have had over the word for at least two centuries. This creates a situation in which the old narrative of the "yellow peril" is easily revived. \n\nYou honestly believe the racist people in this country have a keen understanding of international history and global economics and having reviewed all of that, have decided to become racist? \n\nHave you actually met one of these 

In [85]:
# query = f"{PUSHSHIFT_ENDPOINT}submission/?sort=desc&size=3&ids=g6paa7"
query = "https://api.pushshift.io/reddit/submission/comment_ids/g6paa7"
print(query)
resp = requests.get(query)
if resp.status_code == 200:
    print(resp.json())
    data = resp.json()['data']
    ids = ",".join(data)
    query = f"{PUSHSHIFT_ENDPOINT}comment/?sort=desc&ids={ids}"
    time.sleep(5)
    resp = requests.get(query)
#     print(len(resp.json()['data']))
#     query = f"https://api.pushshift.io/reddit/comment/?sort=desc&ids={ids}"
#     time.sleep(5)
#     resp = requests.get(query)
    if resp.status_code != 200:
        print(resp.status_code)
    count = 0
    for com in resp.json()['data']:
        if com['parent_id'].startswith('t3'):
            count+=1
    print(count)
    
    
    
else:
    print(resp.status_code, "TRY AGAIN")
#                 query = PUSHSHIFT_ENDPOINT + retrieval_type + '/?subreddit=' + subreddit + \
#                 '&sort=desc&size=' + str(MAX_RETRIEVED_ELEMENTS) + '&before=' + str(before) + '&fields=' + fields + \
#                 "&q=" + query_words + 

https://api.pushshift.io/reddit/submission/comment_ids/g6paa7
{'data': ['fobd9gy', 'fobh398', 'fobh8me', 'fobhlhv', 'fobhmbt', 'fobjs7i', 'fobl1gv', 'fobl2et', 'fobl82o', 'foblct2', 'foblsg7', 'fobluf8', 'fobmd8p', 'fobmnvo', 'fobmp17', 'fobmyjg', 'fobn40a', 'fobnuyl', 'fobo0ks', 'foboal4', 'fobotpx', 'fobp1i8', 'fobpfnm', 'fobpkfd', 'fobpqc2', 'fobq3by', 'fobqow6', 'fobqqry', 'fobr1rv', 'fobrr4r', 'fobs7c2', 'fobs8bb', 'fobsbnp', 'fobsbq8', 'fobsct6', 'fobsppg', 'fobsx8y', 'fobsydp', 'fobsz8m', 'fobtg7i', 'fobtjum', 'fobtmju', 'fobts1x', 'fobu6ip', 'fobuluu', 'fobuod1', 'fobuwcd', 'fobuz1q', 'fobuz7a', 'fobv45r', 'fobvr0o', 'fobwbd0', 'fobwkzg', 'fobwlsj', 'fobwnvu', 'fobwsah', 'fobwt5z', 'fobx3og', 'fobx6i4', 'fobx8ar', 'fobxj2e', 'fobxjqz', 'fobxphf', 'fobxtm5', 'fobxu10', 'fobxx4c', 'fobxz22', 'fobybel', 'fobycc2', 'fobynza', 'fobyq0r', 'fobyqw7', 'fobzdsz', 'fobzkck', 'fobznu0', 'fobzphz', 'fobzr87', 'fobzsaf', 'foc00v2', 'foc09kf', 'foc0g50', 'foc0wa5', 'foc0x4g', 'foc11wg', 'foc

In [35]:
x = set([9])


In [71]:
",".join(map(str, [5, 6]))

'5,6'