# Python 3 Implemenation of Data Collection Using TamTam API

In [134]:
import csv
from gmplot import gmplot
import json
from pprint import pprint
import urllib.request  as urllib2 
import os
import time
import math
import random

key = "4BGs4umzWE9Bg7fluGQR7gToO6Qlageu"
EARTH_POLAR_RADIUS_METER = 6356800
EARTH_EQUATORIAL_RADIUS_METER = 6378100


In [142]:
def queryEventfulWithId(id):
	url = "http://api.eventful.com/json/events/get?app_key=srG2DMrq4VpRxGvw&id="+id
	response = urllib2.urlopen(url).read()
	rspJSON = json.loads(response)
	return rspJSON
    
# Gets the metric length corresponding to the latitude, in degrees.
def getLatitudeWidth(meterWidth):
    return meterWidth/(2*math.pi*EARTH_POLAR_RADIUS_METER ) *360

# Convert the metric width to the corresponding longitude width at latitude
def getLongitudeWidthAtLatitue(latitude, meterWidth):
    circleMeterLengthAtLatitude = 2 * math.pi * EARTH_EQUATORIAL_RADIUS_METER * math.cos(latitude / 180 * math.pi)
    return meterWidth/circleMeterLengthAtLatitude * 360

def degreesToRadians(degrees):
	return degrees * math.pi / 180

def distanceInMetersBetweenEarthCoordinates(lat1, lon1, lat2, lon2):
	earthRadiusKm = 6371
	dLat = degreesToRadians(lat2-lat1)
	dLon = degreesToRadians(lon2-lon1)
	lat1 = degreesToRadians(lat1)
	lat2 = degreesToRadians(lat2)
	a = math.sin(dLat/2) * math.sin(dLat/2) + math.sin(dLon/2) * math.sin(dLon/2) * math.cos(lat1) * math.cos(lat2) 
	c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a)) 
	return earthRadiusKm * c * 1000


# Generates a uniform distribution of count coordinates
# Boundary defined by lat,lon of center corner and width in meters
def generateCoordsWithinSquare(centerLat,centerLon,meterWidth,count,gmap,map_file):

	# Generate rectangle width in latitude and longitude coordinates
	dLat = getLatitudeWidth(meterWidth)
	dLon = getLongitudeWidthAtLatitue(centerLat,meterWidth)

	# Generate and show rectangle boundaries
	upperLeftLat = centerLat + (dLat / 2.0)
	upperLeftLon = centerLon - (dLon / 2.0)

	lowerRightLat = upperLeftLat - dLat
	lowerRightLon = upperLeftLon + dLon

	upperRightLat = upperLeftLat
	upperRightLon = lowerRightLon

	lowerLeftLat = lowerRightLat
	lowerLeftLon = upperLeftLon

	latitudes = []
	longitudes = []
	distances_to_center = []

	# Generate count randomly distributed coordinates
	for i in range(count):
		lat = random.uniform(upperLeftLat, lowerLeftLat)
		lon = random.uniform(upperLeftLon, upperRightLon)
		dist = distanceInMetersBetweenEarthCoordinates(centerLat,centerLon,lat,lon)
		latitudes.append(lat)
		longitudes.append(lon)
		distances_to_center.append(dist)
        
	boundaries = [[upperLeftLat, upperRightLat, lowerRightLat, lowerLeftLat, upperLeftLat], [upperLeftLon, upperRightLon, lowerRightLon, lowerLeftLon, upperLeftLon]]

	return latitudes,longitudes,distances_to_center

def getSpeedAtLoc(lat,lon):
    try:
        url = "https://api.tomtom.com/traffic/services/4/flowSegmentData/absolute/10/json?key="+key+"&point="+lat+","+lon+"&unit=MPH"
        response = urllib2.urlopen(url).read()
        rspJSON = json.loads(response)
        data = rspJSON['flowSegmentData']
        freeFlowSpeed = data["freeFlowSpeed"]
        currentSpeed = data["currentSpeed"]
        confidence = data["confidence"]
        ratio = round(currentSpeed/float(freeFlowSpeed),2)

        #TODO: put average of returned segments lat and lon here

        return [str(lat),str(lon),str(freeFlowSpeed),str(currentSpeed),str(ratio),str(confidence),str(time.time())]
    except urllib2.HTTPError as err:
        print ("not found")

In [136]:
#Event IDs for Dates
March22 = ['E0-001-112125565-5','E0-001-110726616-9','E0-001-112445339-1','E0-001-105077404-7','E0-001-111610388-0','E0-001-113022738-0','E0-001-113021529-7']
March23 = ['E0-001-112893006-5','E0-001-112323987-3','E0-001-110961124-8']
March24 = ['E0-001-105545550-9','E0-001-103317913-9','E0-001-111103613-0','E0-001-104932362-5']
March25 = ['E0-001-112609175-1','E0-001-111148684-9']
March26 = ['E0-001-112887201-7']
March27 = ['E0-001-095606083-4','E0-001-112228617-5']
March31 = ['E0-001-110902509-8','E0-001-109288741-1','E0-001-103166627-9']
April1 = ['E0-001-111784050-8']

In [159]:
import pandas as pd
columns = ['event','title','venue','venue_lat','venue_lon','sample_lats','sample_lons','sample_dist']
df = pd.DataFrame(columns=columns)

# Change Date Below
Change the name of list from March22/March23/March24 etc, based on the date today. Do NOT re-run this block more than once a day!

In [160]:
for i in March22:
    event = queryEventfulWithId(i)
    venue_name = event["venue_name"]
    directory = "Venues/"+venue_name
    latitude = float(event["latitude"])
    longitude = float(event["longitude"])
    latitudes50,longitudes50,distances_to_center = generateCoordsWithinSquare(latitude,longitude,meterWidth,count,gmap,map_file)
    df.loc[i] = [event,event['title'],venue_name,latitude,longitude,latitudes50,longitudes50,distances_to_center]

    

# Save the File
Rename the file below based on the date and run the block below. Do NOT re-run this block more than once a day!

In [163]:
df.to_csv('March22.csv')

In [195]:
#Sanity Check 
#Presents the events and their venues

df.head(7)

Unnamed: 0,event,title,venue,venue_lat,venue_lon,sample_lats,sample_lons,sample_dist
E0-001-112125565-5,"{'withdrawn': '0', 'olson_path': 'America/Los_...",Web Development vs Data Science,Santa Monica Pier,34.010088,-118.496137,"[34.01310035309067, 34.01085478127754, 34.0099...","[-118.49220995651541, -118.4970066164585, -118...","[493.19715726230464, 117.0272812342315, 328.45..."
E0-001-110726616-9,"{'withdrawn': '0', 'olson_path': 'America/Los_...",The Decemberists with Eleanor Friedberger,The Fox Theater Pomona,34.05663,-117.75044,"[34.05491799365976, 34.05794498238665, 34.0596...","[-117.75417687079147, -117.75363663081231, -11...","[393.39034274254016, 328.7801434717994, 345.47..."
E0-001-112445339-1,"{'withdrawn': '0', 'olson_path': 'America/Los_...",Late Night Tattoo on 22,The Natural History Museum of Los Angeles County,34.016957,-118.28877,"[34.01695286276588, 34.01896453656166, 34.0142...","[-118.28692934648775, -118.28376431551332, -11...","[169.67453554950472, 512.5574955438562, 361.71..."
E0-001-105077404-7,"{'withdrawn': '0', 'olson_path': 'America/Los_...",Cesar Millan,Cerritos Center for the Performing Arts,33.867617,-118.061655,"[33.86487226933252, 33.8679752619943, 33.87026...","[-118.05726939711586, -118.05811101802325, -11...","[507.05651027988483, 329.5933440970051, 509.08..."
E0-001-111610388-0,"{'withdrawn': '0', 'olson_path': 'America/Los_...",Bedroom in Los Angeles @The Smell,The Smell,34.050335,-118.245668,"[34.04798676714066, 34.046002645601256, 34.048...","[-118.24414158026715, -118.24401059017346, -11...","[296.5994151097507, 505.38738546025553, 346.17..."
E0-001-113022738-0,"{'withdrawn': '0', 'olson_path': 'America/Los_...",Lil Xan,The Glass House,34.057536,-117.751455,"[34.06200662911498, 34.06012890502192, 34.0593...","[-117.7486707725468, -117.74706516755421, -117...","[559.3448631646529, 496.6160637441241, 293.531..."
E0-001-113021529-7,"{'withdrawn': '0', 'olson_path': 'America/Los_...",Boyband Comedy,Improv,34.083426,-118.367282,"[34.087287881661645, 34.08121377902339, 34.083...","[-118.36907839360381, -118.36613743084399, -11...","[460.2337432514905, 267.57245097904934, 34.163..."


# Run Block Below every 30 mins
Run it every 30 mins. Do NOT forget to rename the list in the first for loop of the block below based on today's date. 

In [263]:
columns1 = ['eventname','loc_type','lat','lon','freeflowspeed','currentspeed','ratio','confidence','CST Time','dist_from_center']
i = 0
dfadd = pd.DataFrame([['ignore this row',loc_type,lat,lon,freeFlowSpeed,currentSpeed,ratio,confidence,timeticks,0]],columns=columns1)

#CHANGE THE DATE BELOW TO March23/March24 etc based on today's date

for i in range(0,len(March22)):
    loc_type = "C"
    lat = df['venue_lat'][i]
    lon = df['venue_lon'][i]
    lat,lon,freeFlowSpeed,currentSpeed,ratio,confidence,timeticks = getSpeedAtLoc(str(lat),str(lon))
    dfadd.loc[len(dfadd)]= [str(df['title'][i]),loc_type,lat,lon,freeFlowSpeed,currentSpeed,ratio,confidence,time.ctime(int(float((timeticks)))),0]

    for j in range(50):
        loc_type = "S"
        lat,lon,freeFlowSpeed,currentSpeed,ratio,confidence,timeticks = getSpeedAtLoc(str(df['sample_lats'][i][j]),str(df['sample_lons'][i][j]))
        dfadd.loc[len(dfadd)] = [str(df['title'][i]),loc_type,lat,lon,freeFlowSpeed,currentSpeed,ratio,confidence,time.ctime(int(float((timeticks)))),df['sample_dist'][i][j]]
        

In [268]:
dfadd.head(20)

Unnamed: 0,eventname,loc_type,lat,lon,freeflowspeed,currentspeed,ratio,confidence,CST Time,dist_from_center
0,ignore this row,C,34.0100878,-118.4961372,42,41,0.98,0.97,1521770950.4721851,0.0
1,Web Development vs Data Science,C,34.0100878,-118.4961372,42,41,0.98,0.98,Thu Mar 22 21:10:19 2018,0.0
2,Web Development vs Data Science,S,34.01310035309067,-118.4922099565154,51,46,0.9,0.97,Thu Mar 22 21:10:19 2018,493.197157
3,Web Development vs Data Science,S,34.01085478127754,-118.4970066164585,42,41,0.98,0.98,Thu Mar 22 21:10:19 2018,117.027281
4,Web Development vs Data Science,S,34.00991137520571,-118.4996942593333,43,43,1.0,0.96,Thu Mar 22 21:10:19 2018,328.454468
5,Web Development vs Data Science,S,34.010068919783066,-118.4948827662621,42,41,0.98,0.98,Thu Mar 22 21:10:20 2018,115.645024
6,Web Development vs Data Science,S,34.0138515922804,-118.4928601131718,51,46,0.9,0.97,Thu Mar 22 21:10:20 2018,516.131431
7,Web Development vs Data Science,S,34.01391348210964,-118.49465692166642,51,46,0.9,0.97,Thu Mar 22 21:10:20 2018,446.741494
8,Web Development vs Data Science,S,34.01372865173877,-118.4920577620948,51,47,0.92,0.97,Thu Mar 22 21:10:20 2018,552.523188
9,Web Development vs Data Science,S,34.00829533640034,-118.49075810275242,46,45,0.98,0.97,Thu Mar 22 21:10:20 2018,534.378426


# Run Below Block to Save File. Edit the Time in the File Name

KEEP CST TIME (Illinois Time) FOR NOW, WE CAN ALTER IT LATER. 

In [267]:
dfadd.to_csv('March22_2110pm.csv')