In [13]:
# Import packages to use
import twint
import numpy as np
import pickle
import pandas as pd
import time
from collections import Counter
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
%matplotlib inline

# Patches asyncio to allow the running of multiple event loops in Jupyter Notebooks.
# Fixes: "RuntimeError: This event loop is already running"
import nest_asyncio

nest_asyncio.apply()

In [2]:
pd.__version__

'1.1.3'

In [3]:
# Finds the distance between two lat/long coordinates
def haversine_distance(lat1, lon1, lat2, lon2):
    r = 6371
    phi1 = np.radians(lat1)
    phi2 = np.radians(lat2)
    delta_phi = np.radians(lat2 - lat1)
    delta_lambda = np.radians(lon2 - lon1)
    a = np.sin(delta_phi / 2)**2 + np.cos(phi1) * np.cos(phi2) *   np.sin(delta_lambda / 2)**2
    res = r * (2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a)))
    return np.round(res, 2)

## Santa Coloma

In [4]:
def getSantaColomaTweets(start_date, end_date):
    df = pd.DataFrame()
    
    # Search Tweets with full municipal name 'Santa Coloma de Gramenet'
    c = twint.Config()
    c.Search = 'Santa Coloma de Gramenet'
    c.Since = start_date
    c.Until = end_date
    c.Pandas = True
    c.Hide_output = True
    twint.run.Search(c)
    
    df = df.append(twint.storage.panda.Tweets_df)
    
    # Search Tweets with short municipal name 'Santa Coloma'
    c = twint.Config()
    c.Search = 'Santa Coloma'
    c.Since = start_date
    c.Until = end_date
    c.Pandas = True
    c.Hide_output = True
    twint.run.Search(c)
    
    df = df.append(twint.storage.panda.Tweets_df)
    df = df.drop_duplicates(subset=['id'], ignore_index=True)
    
    # Search Tweets with the various buildings and regions near Santa Coloma
    c = twint.Config()
    c.Search = '\"Rambla San Sebastian\" OR \"Fluvial del Besos\" OR \"Molinet\" OR \"Plaza del Rellotge\" OR \"Can Zam\" OR \"Can Peixauet\" OR \"Gran Sol\" OR \"Escuela Tanit\" OR \"Terra Roja\" OR \"Instituto Gassol\" OR \"CAP Santa Rosa\" OR \"Cinto Verdaguer\" OR \"del Fondo\"'
    c.Since = start_date
    c.Until = end_date
    c.Pandas = True
    c.Hide_output = True
    twint.run.Search(c)
    
    df = df.append(twint.storage.panda.Tweets_df)
    df = df.drop_duplicates(subset=['id'], ignore_index=True)
    
    # Search Tweets with location near 'Santa Coloma de Gramenet': North Side
    c = twint.Config()
    c.Geo = "41.46287400801948, 2.2028934732857177, 1km"
    c.Since = start_date
    c.Until = end_date
    c.Pandas = True
    c.Hide_output = True
    twint.run.Search(c)
    
    df = df.append(twint.storage.panda.Tweets_df)
    df = df.drop_duplicates(subset=['id'], ignore_index=True)
    
    # Search Tweets with location near 'Santa Coloma de Gramenet': South Side
    c = twint.Config()
    c.Geo = "41.45039468429977, 2.212764002746006, 0.75km"
    c.Since = start_date
    c.Until = end_date
    c.Pandas = True
    c.Hide_output = True
    twint.run.Search(c)
    
    df = df.append(twint.storage.panda.Tweets_df)
    df = df.drop_duplicates(subset=['id'], ignore_index=True)
    
    return df

In [5]:
data = getSantaColomaTweets("2019-07-01", "2019-08-02") # effectively searches for dates 07/01 - 07/31

[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.


In [6]:
# eliminate users posting in native language of area
users = set(data.loc[~data['language'].isin(['es', 'ca', 'und'])]['username'])
print(len(users))

910


In [8]:
placelist = [x.lower() for x in ["coloma", "Rambla San Sebastian", "Fluvial del Besos", "Molinet", "Plaza del Rellotge", "Can Zam", "Can Peixauet", "Gran Sol", "Escuela Tanit", "Terra Roja", "Instituto Gassol", "CAP Santa Rosa", "Cinto Verdaguer", "del Fondo"]]
pattern = '|'.join(placelist)

less_than_two_weeks = []
places = []
i = 1

start = time.time()

# iterates through yearlong Tweets for the above selected users and filters users who have been associated with 
# Santa Coloma within 2 weeks 
for user in users:
    print(i)
    i += 1
    c = twint.Config()
    c.Username = user
    c.Since = "2019-01-01"
    c.Until = "2019-12-31"
    c.Pandas = True
    c.Hide_output = True
    twint.run.Search(c)

    df2 = twint.storage.panda.Tweets_df
    if (len(df2) > 0): 
        df2['tweet'] = df2['tweet'].str.lower()
        df2_tweets = df2[df2['tweet'].str.contains(pattern)]
        df2_places = df2.loc[(df2['place'] != ''), ['username','place','date']]
        

        if len(df2_tweets) > 0:
            max_date = datetime.strptime(max(df2_tweets['date']), "%Y-%m-%d %H:%M:%S")
            min_date = datetime.strptime(min(df2_tweets['date']), "%Y-%m-%d %H:%M:%S")

            if max_date - min_date < timedelta(days=14):
                less_than_two_weeks.append(user)
        
        if len(df2_places) > 0:
            df2_places['coordinates'] = [x['coordinates'] for x in df2_places['place']]
            # get distances to the two central points of Santa Coloma
            df2_places['dist1'] = [haversine_distance(*x, 41.45039468429977, 2.212764002746006) for x in df2_places['coordinates']]
            df2_places['dist2'] = [haversine_distance(*x, 41.46287400801948, 2.2028934732857177) for x in df2_places['coordinates']]
            df2_places = df2_places[(df2_places['dist1'] < 0.75) | (df2_places['dist2'] < 1.0)]
            
            if len(df2_places) > 0:
                max_date = datetime.strptime(max(df2_places['date']), "%Y-%m-%d %H:%M:%S")
                min_date = datetime.strptime(min(df2_places['date']), "%Y-%m-%d %H:%M:%S")

                if max_date - min_date < timedelta(days=14):
                    places.append(user)


1
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
2
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
3
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
4
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
5
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
6
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
7
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
8
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
9
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
10
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
11
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
12
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
1

[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
100
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
101
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
102
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
103
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
104
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
105
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
106
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
107
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
108
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
109
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
110
[!] No more data! Scraping will stop now.
found 0 deleted tweets 

[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
197
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
198
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
199
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
200
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
201
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
202
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
203
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
204
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
205
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
206
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
207
[!] No more data! Scraping will stop now.
found 0 deleted tweets 

[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
294
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
295
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
296
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
297
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
298
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
299
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
300
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
301
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
302
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
303
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
304
[!] No more data! Scraping will stop now.
found 0 deleted tweets 

[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
391
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
392
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
393
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
394
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
395
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
396
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
397
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
398
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
399
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
400
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
401
[!] No more data! Scraping will stop now.
found 0 deleted tweets 

[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
488
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
489
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
490
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
491
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
492
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
493
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
494
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
495
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
496
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
497
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
498
[!] No more data! Scraping will stop now.
found 0 deleted tweets 

[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
585
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
586
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
587
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
588
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
589
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
590
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
591
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
592
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
593
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
594
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
595
[!] No more data! Scraping will stop now.
found 0 deleted tweets 

[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
682
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
683
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
684
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
685
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
686
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
687
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
688
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
689
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
690
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
691
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
692
[!] No more data! Scraping will stop now.
found 0 deleted tweets 

778
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
779
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
780
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
781
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
782
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
783
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
784
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
785
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
786
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
787
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
788
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
789
[!] No more data! Scraping will stop now.
found 0 deleted twe

[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
876
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
877
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
878
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
879
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
880
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
881
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
882
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
883
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
884
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
885
[!] No more data! Scraping will stop now.
found 0 deleted tweets in this search.
886
[!] No more data! Scraping will stop now.
found 0 deleted tweets 

In [9]:
filtered_users = {"keyword_based" : set(less_than_two_weeks), "place_based" : set(places)}

pickle.dump( filtered_users, open( "select_users.p", "wb" ) )

In [10]:
likely_immigrant_users = set(less_than_two_weeks).union(set(places))
len(likely_immigrant_users)

483

In [12]:
len(set(less_than_two_weeks))

464

In [11]:
len(set(places))

23