In [29]:
import os

import tweepy

#https://medium.freecodecamp.org/how-to-build-a-twitter-sentiments-analyzer-in-python-using-textblob-948e1e8aae14
from textblob import TextBlob

from IPython.core.display import clear_output
from time import time

import random
from shapely.affinity import affine_transform
from shapely.geometry import shape, mapping, Point
import json

In [30]:
TWITTER_KEY = os.getenv('TWITTER_KEY')
TWITTER_SECRET = os.getenv('TWITTER_SECRET')
TWITTER_TOKEN = os.getenv('TWITTER_TOKEN')
TWITTER_TOKEN_SECRET = os.getenv('TWITTER_TOKEN_SECRET')

In [31]:
auth = tweepy.OAuthHandler(TWITTER_KEY, TWITTER_SECRET)
auth.set_access_token(TWITTER_TOKEN, TWITTER_TOKEN_SECRET)

api = tweepy.API(auth)

In [56]:
#Pre-calculated triangulation to speed-up process:
with open('nyc-borough-processed.geojson') as json_file:  
    nyc_boroughs = json.load(json_file)
    
for boro in nyc_boroughs['features']:
    boro['geometry'] = shape(boro['geometry'])

#Based on https://codereview.stackexchange.com/questions/69833/generate-sample-coordinates-inside-a-polygon
def random_point_in_polygon(transforms, areas):
    transform = random.choices(transforms, weights=areas)
    x, y = [random.random() for _ in range(2)]
    if x + y > 1:
        p = Point(1 - x, 1 - y)
    else:
        p = Point(x, y)
    return affine_transform(p, transform[0])
    
def random_point_in_box(box):
    minx, miny, maxx, maxy = box.bounds
    center = box.centroid
    return Point(random.triangular(minx, maxx, center.x), random.triangular(miny, maxy, center.y))

def which_borough(point):
    for boro in nyc_boroughs['features']:
        if boro['geometry'].intersects(point):
            return boro['properties']['boro_name']
    return False
    
def process_coordinates(tweet):
    
    boro_list = ['Manhattan', 'Brooklyn', 'Bronxs', 'Queens', 'Staten Island']
    nyc_ids = ['27485069891a7938','94965b2c45386f87']
    tweet['coords_source'] = 'Randomized'
        
    if tweet['coordinates']:
        #Check if inside NYC
        coords = shape(tweet['coordinates'])
        boro_name = which_borough(coords)
        if boro_name:
            tweet['coords_source'] = 'Origin'
            tweet['coordinates'] = mapping(coords)
            tweet['borough'] = boro_name
        else: 
            tweet['coords_source'] = False    
            
    elif tweet['place']['place_type'] == 'poi':
        #Point of interest.
        coords = random_point_in_box(shape(tweet['place']['bounding_box']))
        boro_name = which_borough(coords)
        if boro_name:
            tweet['coordinates'] = mapping(coords)
            tweet['borough'] = boro_name
        else: 
            tweet['coords_source'] = False
        
    elif (tweet['place']['place_type'] == 'neighborhood') & any(tweet['place']['full_name'].endswith(boro) for boro in boro_list):
        #Place is a neigborhood in a NYC borough. Full name is [Neigborhood], [City (Borough)]
        boro_name = tweet['place']['full_name'].split(', ')[-1]
        
        if boro_name not in boro_list:
            raise ValueError('{} from {} not a boro'.format(boro_name, tweet['place']['name']))
        
        boro = list(filter(lambda x: x['properties']['boro_name'] == boro_name, nyc_boroughs['features']))[0]        
        tweet['coordinates'] = mapping(random_point_in_polygon(boro['transforms'], boro['areas']))
        tweet['borough'] = boro_name
        
    elif (tweet['place']['place_type'] == 'city') & (tweet['place']['name'] in boro_list):
        #Place is a NYC borough
        tweet['coordinates'] = mapping(random_point_in_box(shape(tweet['place']['bounding_box'])))
        tweet['borough'] = tweet['place']['name']
        
    elif (tweet['place']['place_type'] == 'admin') & (tweet['place']['id'] in nyc_ids):
        #Place is NYC
        b = random.randint(0,4)
        boro = nyc_boroughs['features'][b]
        tweet['coordinates'] = mapping(random_point_in_polygon(boro['transforms'], boro['areas']))
        tweet['borough'] = boro['properties']['boro_name']
        
    else:
        tweet['coords_source'] = False
        
    return tweet

In [57]:
allTweets = []

class MyStreamListener(tweepy.StreamListener):

    latestTweets = []
    totalTweets = 0
    nExactLocation = 0
    startTime = None
    
    def on_data(self, data):
        
        status = json.loads(data)
        
        if (status['coordinates']):
            self.nExactLocation += 1
        
        status = process_coordinates(status)
        
        analysis = TextBlob(status['text'])
        
        allTweets.insert(0,status)
        self.latestTweets.insert(0,(status['text'], analysis.sentiment[0], status['coords_source'], status['coordinates']))
        self.totalTweets += 1
        
        if (self.startTime is None):
            self.startTime = time()
        
        if len(self.latestTweets) > 10:
            self.latestTweets.pop()
        
        for tweet in self.latestTweets:
            print(tweet)
            
        print()
        elapsedTime = time() - self.startTime
        print('Tweets with lat/long: {} of {} ({:.2f}%); Frequency: {:.2f} tweets/s'.format(self.nExactLocation,
                                                                                              self.totalTweets,
                                                                                              self.nExactLocation/self.totalTweets*100,
                                                                                              self.totalTweets/elapsedTime))
        
        clear_output(wait=True)
        
    def on_error(self, status_code):
        if status_code == 420:
            #returning False in on_error disconnects the stream
            return False

In [58]:
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth = api.auth, listener=myStreamListener)

In [59]:
#New York City bounding box according to Twitter Docs: -74,40,-73,41
#https://developer.twitter.com/en/docs/tweets/filter-realtime/guides/basic-stream-parameters
myStream.filter(locations=[-74.3,40.48,-73.7,40.93])

KeyboardInterrupt: 

In [61]:
allTweets[-1]

{'created_at': 'Mon Mar 04 22:57:20 +0000 2019',
 'id': 1102704606142971904,
 'id_str': '1102704606142971904',
 'text': '@nypost As if anyone would waste our cheese 🤷\u200d♀️',
 'display_text_range': [8, 48],
 'source': '<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>',
 'truncated': False,
 'in_reply_to_status_id': 1101569457137377286,
 'in_reply_to_status_id_str': '1101569457137377286',
 'in_reply_to_user_id': 17469289,
 'in_reply_to_user_id_str': '17469289',
 'in_reply_to_screen_name': 'nypost',
 'user': {'id': 323919952,
  'id_str': '323919952',
  'name': 'Cibao Meat Products',
  'screen_name': 'CibaoMeatProd',
  'location': 'Bronx, NY',
  'url': 'http://cibaomeat.com',
  'description': 'The tradition of quality, the taste of excellence. #CibaoCravings',
  'translator_type': 'none',
  'protected': False,
  'verified': False,
  'followers_count': 210,
  'friends_count': 145,
  'listed_count': 6,
  'favourites_count': 132,
  'statuses_count': 1249,
  'created_at': 

In [62]:
len(list((filter(lambda x: x is not None, map(lambda x: x['in_reply_to_status_id'], allTweets)))))

1783

In [63]:
len(list((filter(lambda x: x, map(lambda x: x['is_quote_status'], allTweets)))))

769

In [64]:
len(list((filter(lambda x: x, map(lambda x: x.get('retweeted_status',False), allTweets)))))

0

In [70]:
set((filter(lambda x: x, map(lambda x: (x['place']['place_type'] if x['place'] else 'none',
                                        x['place']['full_name'] if x['place'] else 'none',
                                        x['place']['id'] if x['place'] else 'none'), allTweets))))

{('admin', 'NEW YORK', 'b6c2e04f1673337f'),
 ('admin', 'New Jersey, USA', '65b4760a2b411e11'),
 ('admin', 'New York, NY', '27485069891a7938'),
 ('admin', 'New York, USA', '94965b2c45386f87'),
 ('admin', 'Nova Iorque, USA', '94965b2c45386f87'),
 ('admin', 'Nueva York, USA', '94965b2c45386f87'),
 ('city', 'Bayonne, NJ', '3d3c56338b6a3b4a'),
 ('city', 'Belleville, NJ', 'a227c629b631eea2'),
 ('city', 'Bergenfield, NJ', '980e30571d9d5f5d'),
 ('city', 'Bloomfield, NJ', 'dc4ad595a56393ac'),
 ('city', 'Bronx, NY', '002e24c6736f069d'),
 ('city', 'Brooklyn, NY', '011add077f4d2da3'),
 ('city', 'Caldwell, NJ', '8739d0bab792509d'),
 ('city', 'Carlstadt, NJ', 'fb3c12bc62543193'),
 ('city', 'Carteret, NJ', '71177291f1601de3'),
 ('city', 'Cedar Grove, NJ', 'f1a993cc8833f408'),
 ('city', 'Cedarhurst, NY', '15bb646d59ef405f'),
 ('city', 'Clark, NJ', '88c45dc6118d88a2'),
 ('city', 'Cliffside Park, NJ', 'ebbd168611073cb8'),
 ('city', 'Clifton, NJ', '7356b662670b2c31'),
 ('city', 'Colonia, NJ', 'dfa88de1ec

In [65]:
from functools import reduce

def reducer(x,y):
    x[y] = x.get(y, 0) + 1
    return x

In [71]:
places = reduce(reducer,map(lambda x: x['geo_source'], allTweets),{})
places

{'Randomized': 2832, 'Exact': 326, False: 1255}

## Get Delaunays triangulation of polygons, and keep only those inside polygon.

In [None]:
from shapely.geometry import shape, Point, Polygon, mapping
import json

In [None]:
with open('nyc-borough-boundaries.geojson') as json_file:  
    data = json.load(json_file)

In [119]:
for d in data['features']:
    d['geometry'] = shape(d['geometry'])
    areas = []
    transforms = []
    for t in triangulate(d['geometry']):
        if t.within(d['geometry']):
            areas.append(t.area)
            (x0, y0), (x1, y1), (x2, y2), _ = t.exterior.coords
            transforms.append([x1 - x0, x2 - x0, y2 - y0, y1 - y0, x0, y0])
    d['areas'] = areas
    d['transforms'] = transforms
    print(d['properties']['boro_name'])

Manhattan
Bronx
Staten Island
Brooklyn
Queens


In [125]:
for d in data['features']:
    d['geometry'] = mapping(d['geometry'])

In [126]:
with open('nyc-borough-processed.geojson', 'w') as outfile:  
    json.dump(data, outfile)

In [128]:
for d in data2['features']:
    d['geometry'] = shape(d['geometry'])

In [36]:
boro_names = [d['properties']['boro_name'] for d in data['features']]

In [129]:
from numpy.random import uniform

yes = 0
no = 0

for t in allTweets:
    if (t['coordinates'] is None) and (t['place']['place_type'] == 'city') and (t['place']['name'] in boro_names):
        coords = t['place']['bounding_box']['coordinates'][0]
        #minx = min(coords, key=lambda x: x[0])[0]
        #maxx = max(coords, key=lambda x: x[0])[0]
        #miny = min(coords, key=lambda x: x[1])[1]
        #maxy = max(coords, key=lambda x: x[1])[1]
        boro = list(filter(lambda x: x['properties']['boro_name'] == t['place']['name'], data2['features']))[0]
        #minx, miny, maxx, maxy = boro.bounds
        #x = uniform(minx, maxx)
        #y = uniform(miny, maxy)
        #while (~boro.intersects(Point([x, y]))):
        #    x = uniform(minx, maxx)
        #    y = uniform(miny, maxy)
        t['coords'] = random_point_in_polygon(boro['transforms'], boro['areas'])
        if t['coords'].within(boro['geometry']):
            yes += 1
        else:
            no += 1

In [132]:
yes

1192