# Twitter webscraping -- Rene's workplace

In [1]:
import tweepy
from tweepy import Stream
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
import csv
import sys
import time
import argparse
import string
import json
import geopandas as gpd
import geocoder
import numpy as np

Below, the codes are referenced from an outside file containing the authentication codes, as to avoid a potential leak.  

In [2]:
d = json.load(open("codes.txt"))
consumer_key = d['1'] 
consumer_secret = d['2'] 
access_token = d['3'] 
access_secret = d['4'] 

This below contains the Twitter hashtags considered important, with the English and Spanish hashtags intended to query for Spanish-speaking accounts referencing these tags. 

In [3]:
hshtgs = ["Evictions", 
            "Homelessness", 
            "Homeless", 
            "Tenant", 
            "TenantPower",
            "AffordableHousing", 
            "CancelRent", 
            "RentStrike", 
            "TaxTheRich", 
            "EvictionMoratorium", 
            "RentMoratorium", 
            "EvictionFree", 
            "NoEvictions", 
            "Housing assistance", 
            "Housingrelief", 
            "COVID19", 
            "Rentstrike", 
            "Housingcrisis", 
            "Rentburden", 
            "HousingForAll", 
            "Evictionfreeze", 
            "HousingAffordability", 
            "EquitableDevelopment", 
            "Unemployment", 
            "Foreclosure", 
            "SuspendRent", 
            "RentFreeze", 
            "Displacement", 
            "RentalAssistance", 
            "ReclaimOurHomes",
            "StopEvictions", 
            "CantPayWontPay"]
spanish_hshtgs = ["CancelLaRenta",
                "SinDesalojos",
                "ComidaSiRentaNo",
                "NoCobramosNoPagamos",
                "RentaEstabilizada",
                "HuelgadeAlquileres",
                "CancelenDeudas",
                "LaRentaNoSeCome",
                "CansadosDeLimosnas",
                "QueremosJusticia",
                "ImpactoLatino"]

In [None]:
#Spanish tweets
import pandas as pd

for i in range(len(spanish_hshtgs)):
    hshtg = spanish_hshtgs[i]
    cmd = "#{}".format(hshtg)
    print(cmd)
    !snscrape --since "2019-12-31" twitter-search $cmd >> sp_links.txt
    
for i in range(len(hshtgs)):
    hshtg = hshtgs[i]
    cmd = "#{}".format(hshtg)
    print(cmd)
    !snscrape --since "2019-12-31" twitter-search $cmd >> en_links.txt

#CancelLaRenta
#SinDesalojos
#ComidaSiRentaNo
#NoCobramosNoPagamos
#RentaEstabilizada
#HuelgadeAlquileres
#CancelenDeudas
#LaRentaNoSeCome
#CansadosDeLimosnas
#QueremosJusticia
#ImpactoLatino
#Evictions
#Homelessness


In [None]:
with open('sp_links.txt', 'r') as f:
    tweets_sp = [line.strip() for line in f]
with open('en_links.txt', 'r') as f:
    tweets_en = [line.strip() for line in f]
    
print("Text file containing tweets with Spanish hashtags: " + str(len(tweets_sp)))
print("Text file containing tweets with English hashtags: " + str(len(tweets_en)))

In [None]:
import os 

df = pd.DataFrame()
def get_tweets(tweet_ids):
    #     global api
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_secret)
    api = tweepy.API(auth, wait_on_rate_limit = True, wait_on_rate_limit_notify = True)
    statuses = api.statuses_lookup(tweet_ids, tweet_mode="extended")
    data = df # define your own dataframe
    # printing the statuses
    for status in statuses:
        # print(status.lang)        if status.lang == "en":
        if status.place == "Paterson, NJ":
            mined = {
                "tweet_id": status.id,
                "name": status.user.name,
                "screen_name": status.user.screen_name,
                "retweet_count": status.retweet_count,
                "text": status.full_text,
                "mined_at": datetime.datetime.now(),
                "created_at": status.created_at,
                "favourite_count": status.favorite_count,
                "hashtags": status.entities["hashtags"],
                "status_count": status.user.statuses_count,
                "followers_count": status.user.followers_count,
                "location": status.place,
                "source_device": status.source,
            }            
            last_tweet_id = status.id
            data = data.append(mined, ignore_index=True)    
    data.to_csv(
        f"Extracted_TWEETS.csv", mode="a", header=not os.path.exists("Extracted_TWEETS.csv"), index=False
    )
    print("..... going to sleep 20s")
    time.sleep(20)
    
for i in tweet_ids :
    get_tweets(i) 

## Nothing important below. 

In [29]:
# config
location_ = "40.914745,-74.162827,10mi" #"40.914745,-74.162827,10mi"
date_ = "2020-01-01" #"2018-11-16"

def search_tweets(title, query, date_since = "01-01-2020"):
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_secret)
    api = tweepy.API(auth, wait_on_rate_limit = True, wait_on_rate_limit_notify = True)
    
    search_term = query
    with open(title + ".csv", 'a', encoding = "utf-8",  newline='') as csvfile:
        tweetData = csv.writer(csvfile)
        for tweet in tweepy.Cursor(api.search, q = query and ("place:%s" % "Paterson, NJ"), tweet_mode="extended",  since=date_since).items():
            if tweet.coordinates is not None:
                tweetData.writerow([search_term, tweet.created_at, tweet.full_text, tweet.coordinates["coordinates"], tweet.user.screen_name])
            elif tweet.place is not None:
                tweetData.writerow([search_term, tweet.created_at, tweet.full_text, tweet.place.full_name, tweet.user.screen_name])
            else:
                tweetData.writerow([search_term, tweet.created_at, tweet.full_text, tweet.user.location, tweet.user.screen_name])
    csvfile.close()    

In [7]:
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
 
api = tweepy.API(auth, wait_on_rate_limit =True)

@classmethod
def parse(cls, api, raw):
    status = cls.first_parse(api, raw)
    setattr(status, 'json', json.dumps(raw))
    return status
 

class Listener(StreamListener):
    def on_data(self, data):
        try:
            # Parsing the json recieved from the twitter stream
            jsonData = json.loads(data)
            print(jsonData)
            createdAt = jsonData['created_at']
            text = jsonData['text']
            print("Created at : " , createdAt , " text : " , text)
            saveThis = createdAt + " ---> " + text
            
            saveFile = open("results.csv","a")
            saveFile.write(saveThis.encode('utf-16'))
            saveFile.write("\n")
            saveFile.close()
            title_ = "tweets_.csv"
            with open(title_, 'a', encoding = "utf-8",  newline='') as csvfile:
                w = csv.DictWriter(csvfile, fieldnames = sorted(D.keys()))
                w.writeheader()
                w.writerow(D)
            csvfile.close()
            return True
        except BaseException as e:
            print('Failed on data : ', str(e))
            time.sleep(5)

    def on_error(self, status):
        print("Error : ", status)

# Listener for twitter streaming             
language_ = ["en"]
tracks_ = ["CancelRent", "RentStrike"]


g_ = g.json.get('raw').get('boundingbox')
order = [0, 2, 1, 3]
locations_ = [float(g_[_]) for _ in order]
print(locations_)

if __name__ == '__main__':
    twitter_stream = Stream(auth, Listener())
    twitter_stream.filter(track = tracks_, locations = locations_)

[40.888682, -74.206197, 40.94159, -74.129233]
{'created_at': 'Mon Sep 28 23:28:56 +0000 2020', 'id': 1310723194593193984, 'id_str': '1310723194593193984', 'text': 'RT @ny_indivisible: BREAKING: @NYGovCuomo Now extends Residential Eviction Moratorium for 2020. \n\nNow he MUST #CancelRent \n\nSupport NYS Bil‚Ä¶', 'source': '<a href="http://tapbots.com/tweetbot" rel="nofollow">Tweetbot for iŒüS</a>', 'truncated': False, 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 37020219, 'id_str': '37020219', 'name': 'xECKx', 'screen_name': 'xECK29x', 'location': 'Long Island, NY', 'url': None, 'description': 'He/Him, #Isles, infosec DFIR, straight-edge, EHM DB guy. Mostly RTs.', 'translator_type': 'none', 'protected': False, 'verified': False, 'followers_count': 1050, 'friends_count': 5001, 'listed_count': 126, 'favourites_count': 41369, 'statuses_count': 50832, 'created_a

{'created_at': 'Mon Sep 28 23:29:01 +0000 2020', 'id': 1310723217011564544, 'id_str': '1310723217011564544', 'text': '#HousingCrisis #CancelRent', 'source': '<a href="https://mobile.twitter.com" rel="nofollow">Twitter Web App</a>', 'truncated': False, 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 2530758806, 'id_str': '2530758806', 'name': 'FM', 'screen_name': 'lily_oh_lily_', 'location': 'Pacific Northwest', 'url': None, 'description': 'It is the mark of an educated mind to be able to entertain a thought without accepting it. - Aristotle', 'translator_type': 'none', 'protected': False, 'verified': False, 'followers_count': 445, 'friends_count': 167, 'listed_count': 73, 'favourites_count': 58492, 'statuses_count': 84287, 'created_at': 'Wed May 28 20:07:06 +0000 2014', 'utc_offset': None, 'time_zone': None, 'geo_enabled': False, 'lang': None, 'contributors_en

{'created_at': 'Mon Sep 28 23:32:43 +0000 2020', 'id': 1310724145668210689, 'id_str': '1310724145668210689', 'text': 'RT @jeremoss: Folks from @abolitionpark are having a housewarming -- in front of Andrew Cuomo‚Äôs office at 633 3rd Ave in Manhattan. They ha‚Ä¶', 'source': '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>', 'truncated': False, 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 1196932621625769984, 'id_str': '1196932621625769984', 'name': 'Love is love // BLM // Pro-Fem', 'screen_name': 'lvislvblmprofem', 'location': None, 'url': None, 'description': None, 'translator_type': 'none', 'protected': False, 'verified': False, 'followers_count': 5, 'friends_count': 118, 'listed_count': 0, 'favourites_count': 4434, 'statuses_count': 1446, 'created_at': 'Tue Nov 19 23:26:17 +0000 2019', 'utc_offset': None, 'time_zone':

{'created_at': 'Mon Sep 28 23:33:17 +0000 2020', 'id': 1310724290602569729, 'id_str': '1310724290602569729', 'text': 'RT @nycDSA: You know what would provide real, fundamental stability for the lives of working-class New Yorkers instead of putting them at r‚Ä¶', 'source': '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>', 'truncated': False, 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 498456198, 'id_str': '498456198', 'name': 'Rocco Marinara', 'screen_name': 'tokingalex', 'location': None, 'url': None, 'description': '26 queer bruh (he/they) | professionally mediocre musician | unemployed healthcare worker | music therapist who needs therapy | BLM | TransNBlivesmatter | M4A', 'translator_type': 'none', 'protected': False, 'verified': False, 'followers_count': 1042, 'friends_count': 1250, 'listed_count': 7, 'favourites_c

{'created_at': 'Mon Sep 28 23:35:49 +0000 2020', 'id': 1310724926077390849, 'id_str': '1310724926077390849', 'text': 'RT @ny_indivisible: BREAKING: @NYGovCuomo Now extends Residential Eviction Moratorium for 2020. \n\nNow he MUST #CancelRent \n\nSupport NYS Bil‚Ä¶', 'source': '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>', 'truncated': False, 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 339340704, 'id_str': '339340704', 'name': 'Cheryl Smith', 'screen_name': 'Cheryl_Smith1', 'location': 'New York', 'url': None, 'description': 'BAT. 30+ year science teacher, FUSE Negotiation Chairperson,  NYSUT union member, 18 year school board retired trustee. Fighting for our kids & public education', 'translator_type': 'none', 'protected': False, 'verified': False, 'followers_count': 2446, 'friends_count': 2915, 'listed_count': 140, 

{'created_at': 'Mon Sep 28 23:37:33 +0000 2020', 'id': 1310725362666688512, 'id_str': '1310725362666688512', 'text': 'RT @Marni4Council: So terribly sad to see the closing of #Chelsea iconic diner: The Rail Line  Diner. They could have survived w/#CancelRen‚Ä¶', 'source': '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>', 'truncated': False, 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 709828169, 'id_str': '709828169', 'name': 'Purplegarter', 'screen_name': 'Purplegarter1', 'location': None, 'url': None, 'description': None, 'translator_type': 'none', 'protected': False, 'verified': False, 'followers_count': 3753, 'friends_count': 3524, 'listed_count': 949, 'favourites_count': 328736, 'statuses_count': 595113, 'created_at': 'Fri Oct 11 20:35:04 +0000 2013', 'utc_offset': None, 'time_zone': None, 'geo_enabled': True, 'lang'

{'created_at': 'Mon Sep 28 23:51:55 +0000 2020', 'id': 1310728980790685696, 'id_str': '1310728980790685696', 'text': 'RT @PilsenAlliance: üö® PILSEN RALLY THURSDAY OCT. 1 AGAINST GENTRIFICATION &amp; DISPLACEMENT üö®\xa0\n\nRally 4pm at 18th &amp; Loomis (Plaza Tenochtitl√°n‚Ä¶', 'source': '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>', 'truncated': False, 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 828665423839244288, 'id_str': '828665423839244288', 'name': 'les-beaner', 'screen_name': '1_800_aries', 'location': 'ur heart', 'url': None, 'description': 'they/them femme', 'translator_type': 'none', 'protected': False, 'verified': False, 'followers_count': 109, 'friends_count': 305, 'listed_count': 1, 'favourites_count': 47316, 'statuses_count': 11840, 'created_at': 'Mon Feb 06 18:03:27 +0000 2017', 'utc_offset': None,

KeyboardInterrupt: 