In [1]:
from fastavro import parse_schema
from faker import Faker
import random
import string
from datetime import datetime
from geopy.geocoders import Nominatim
import geopy
from geopy.extra.rate_limiter import RateLimiter
geopy.geocoders.options.default_user_agent = "BDBA"
from tqdm import tqdm
import numpy as np

# 1. Define the data feeds

__Real-time vehicle information__

In [2]:
vehic_schema = {
    'doc': 'Traffic detection',
    'name': 'real-time_vehic_info',
    'namespace': 'traffic_detection.vehicle',
    'type': 'record',
    'fields': [
        {'name': 'vehicle_id', 'type': 'string'},
        {'name': 'datetime_utc', 'type': 'float'},
        {'name': 'latitude', 'type': 'float'},
        {'name': 'longitude', 'type': 'float'},
        {'name': 'accelerometer', 'type': 'int'},
        {'name': 'vehicle_type', 'type': ["null", {"type": "enum", "name":"vehicle_type",
                                                   "symbols": ["truck","taxi","bus","private_vehicle"]}]}#,
        #{'name': 'engine_state', 'type': ["null", {"type": "enum", "symbols": ["ON", "OFF"]}]}
    ],
}

parsed_vehic_schema = parse_schema(vehic_schema)
parsed_vehic_schema

{'type': 'record',
 'doc': 'Traffic detection',
 'name': 'traffic_detection.vehicle.real-time_vehic_info',
 'fields': [{'name': 'vehicle_id', 'type': 'string'},
  {'name': 'datetime_utc', 'type': 'float'},
  {'name': 'latitude', 'type': 'float'},
  {'name': 'longitude', 'type': 'float'},
  {'name': 'accelerometer', 'type': 'int'},
  {'name': 'vehicle_type',
   'type': ['null',
    {'type': 'enum',
     'name': 'traffic_detection.vehicle.vehicle_type',
     'symbols': ['truck', 'taxi', 'bus', 'private_vehicle']}]}],
 '__fastavro_parsed': True,
 '__named_schemas': {'traffic_detection.vehicle.real-time_vehic_info': {'type': 'record',
   'doc': 'Traffic detection',
   'name': 'traffic_detection.vehicle.real-time_vehic_info',
   'fields': [{'name': 'vehicle_id', 'type': 'string'},
    {'name': 'datetime_utc', 'type': 'float'},
    {'name': 'latitude', 'type': 'float'},
    {'name': 'longitude', 'type': 'float'},
    {'name': 'accelerometer', 'type': 'int'},
    {'name': 'vehicle_type',
    

__Road Information__

In [3]:
road_schema = {
    'doc': 'Traffic detection',
    'name': 'road_info',
    'namespace': 'traffic_detection.roads',
    'type': 'record',
    'fields': [
        {'name': 'road_name', 'type': 'string'},
        {'name': 'number_lanes', 'type': 'int'},
        {'name': 'min_speed', 'type': 'int'},
        {'name': 'has_bus_lane', 'type': ["null", {"type": "enum", "name":"has_bus_lane", "symbols": [0,1]}]}
    ],
}

parsed_road_schema = parse_schema(road_schema)
parsed_road_schema

{'type': 'record',
 'doc': 'Traffic detection',
 'name': 'traffic_detection.roads.road_info',
 'fields': [{'name': 'road_name', 'type': 'string'},
  {'name': 'number_lanes', 'type': 'int'},
  {'name': 'min_speed', 'type': 'int'},
  {'name': 'has_bus_lane',
   'type': ['null',
    {'type': 'enum',
     'name': 'traffic_detection.roads.has_bus_lane',
     'symbols': [0, 1]}]}],
 '__fastavro_parsed': True,
 '__named_schemas': {'traffic_detection.roads.road_info': {'type': 'record',
   'doc': 'Traffic detection',
   'name': 'traffic_detection.roads.road_info',
   'fields': [{'name': 'road_name', 'type': 'string'},
    {'name': 'number_lanes', 'type': 'int'},
    {'name': 'min_speed', 'type': 'int'},
    {'name': 'has_bus_lane',
     'type': ['null',
      {'type': 'enum',
       'name': 'traffic_detection.roads.has_bus_lane',
       'symbols': [0, 1]}]}]},
  'traffic_detection.roads.has_bus_lane': {'type': 'enum',
   'name': 'traffic_detection.roads.has_bus_lane',
   'symbols': [0, 1]}}}

# 2. Generate Records

In [4]:
fake = Faker()
Faker.seed(2000)
random.seed(2000)

## For vehicles

In [5]:
def generate_vehic_id():
    
    VOWELS = list("AEIOU")
    CONSONANTS = list(set(string.ascii_uppercase) - set(VOWELS))
    
    first_digit = str(fake.pyint(min_value=0, max_value=9, step=1))
    second_digit = str(fake.pyint(min_value=0, max_value=9, step=1))
    third_digit = str(fake.pyint(min_value=0, max_value=9, step=1))
    fourth_digit = str(fake.pyint(min_value=0, max_value=9, step=1))
    
    first_letter = random.choice(CONSONANTS)
    second_letter = random.choice(CONSONANTS)
    third_letter = random.choice(CONSONANTS)
    
    plate = first_digit+second_digit+third_digit+fourth_digit+' '+first_letter+second_letter+third_letter
    vehic_type = random.choice(["truck","taxi","bus","private_vehicle"])
    return(plate, vehic_type)

In [6]:
def vehicle_record(start_date, end_date):
    ids = generate_vehic_id()
    record = {
        u'vehicle_id': ids[0],
        u'datetime_utc': fake.date_time_between_dates(datetime_start=start_date, datetime_end=end_date).timestamp(),
        u'latitude': random.uniform(40.227240, 40.644740),
        u'longitude': random.uniform(-3.944317, -3.426800),
        u'accelerometer': fake.pyint(min_value=0, max_value=180, step=1),
        u'vehicle_type': ids[1],
    }
    return record

In [7]:
vehic_record_count = 100 # number of vehicle records

In [8]:
start_date = datetime(2020, 5, 3, 9, 1, 1) # year, month, day, hour, minute, second
end_date = datetime(2020, 5, 3, 9, 30, 1) # year, month, day, hour, minute, second
vehic_records = [vehicle_record(start_date, end_date) for _ in range(0, vehic_record_count)]

In [9]:
vehic_records

[{'vehicle_id': '7074 JYN',
  'datetime_utc': 1588497903.0,
  'latitude': 40.52135629950477,
  'longitude': -3.8507919185827992,
  'accelerometer': 46,
  'vehicle_type': 'bus'},
 {'vehicle_id': '9263 KDM',
  'datetime_utc': 1588498142.0,
  'latitude': 40.57007465863976,
  'longitude': -3.6071938480844157,
  'accelerometer': 166,
  'vehicle_type': 'taxi'},
 {'vehicle_id': '2895 ZTV',
  'datetime_utc': 1588497165.0,
  'latitude': 40.44546287335486,
  'longitude': -3.5125486436404882,
  'accelerometer': 133,
  'vehicle_type': 'bus'},
 {'vehicle_id': '6030 HXH',
  'datetime_utc': 1588497698.0,
  'latitude': 40.30815660225567,
  'longitude': -3.909501918887368,
  'accelerometer': 162,
  'vehicle_type': 'private_vehicle'},
 {'vehicle_id': '9731 YWN',
  'datetime_utc': 1588497903.0,
  'latitude': 40.47042101938655,
  'longitude': -3.4346389045623913,
  'accelerometer': 8,
  'vehicle_type': 'truck'},
 {'vehicle_id': '3719 WQR',
  'datetime_utc': 1588497968.0,
  'latitude': 40.41651432933575,
 

__Road names from vehicles coordinates__

In [10]:
def road_from_coord(lat, lon):
    coordinates= str(lat)+ ','+str(lon)
    #10 mins maximum timeout to prevent being blocked
    locator = Nominatim(timeout=10)
    rgeocode = RateLimiter(locator.reverse, min_delay_seconds=0.001)
    location = rgeocode(coordinates)
    return(location)

In [11]:
road_names = []
final_vehics = []
for i in tqdm(vehic_records):
    location = road_from_coord(i['latitude'],i['longitude'])
    try:
        road_names.append(location.raw['address']['road'])
        final_vehics.append(i)
    except KeyError:
        continue

100%|██████████| 100/100 [00:49<00:00,  2.02it/s]


## For roads

In [12]:
road_names= np.unique(road_names)

In [13]:
def road_record(road_name):
    record = {
        u'road_name': road_name,
        u'number_lanes': fake.pyint(min_value=1, max_value=4, step=1),
        u'min_speed': random.choice([25,50,60]),
        u'has_bus_lane': random.choice([0,1]),
    }
    return record

In [14]:
road_records = [road_record(name) for name in road_names]

In [15]:
road_records

[{'road_name': 'AP-41', 'number_lanes': 4, 'min_speed': 60, 'has_bus_lane': 0},
 {'road_name': 'Autopista radial RM-1',
  'number_lanes': 2,
  'min_speed': 50,
  'has_bus_lane': 0},
 {'road_name': 'Autovía del Este',
  'number_lanes': 4,
  'min_speed': 25,
  'has_bus_lane': 1},
 {'road_name': 'Autovía del Suroeste',
  'number_lanes': 3,
  'min_speed': 50,
  'has_bus_lane': 1},
 {'road_name': 'Avenida Antonio Machado',
  'number_lanes': 1,
  'min_speed': 50,
  'has_bus_lane': 0},
 {'road_name': 'Avenida Nuestra Señora del Retamar',
  'number_lanes': 4,
  'min_speed': 60,
  'has_bus_lane': 0},
 {'road_name': 'Avenida Puente Cultural',
  'number_lanes': 1,
  'min_speed': 25,
  'has_bus_lane': 0},
 {'road_name': 'Avenida de Madrid',
  'number_lanes': 2,
  'min_speed': 50,
  'has_bus_lane': 0},
 {'road_name': 'Avenida de la Hispanidad',
  'number_lanes': 4,
  'min_speed': 50,
  'has_bus_lane': 0},
 {'road_name': 'Avenida de la Tierra',
  'number_lanes': 4,
  'min_speed': 25,
  'has_bus_lane

## Write out records

In [16]:
import json

with open('roads.json', 'w') as json_file:
    json.dump(road_records, json_file)
with open('vehicles.json', 'w') as json_file:
    json.dump(final_vehics, json_file)