Create fake data using pydantic and faker

In [58]:
from __future__ import annotations
from typing import Any, List
from pydantic import BaseModel


In [57]:
from pydantic import BaseModel
from jsf import JSF


class Person(BaseModel):
    name: str
    id: int


faker = JSF(Person.schema())
print(faker.generate())


{'name': 'reiciendis veniam odit officiis possimus nobis', 'id': 2801}


In [10]:
class Sensor(BaseModel):
    __root__: Any


print(Sensor.schema_json(indent=2))
# faker = JSF(Sensor.schema())
# print(faker.generate())


{
  "title": "Sensor"
}


Creating the JSON data by hand

In [18]:
import datetime
from datetime import date, timedelta
import random
import json
from faker import Faker

In [44]:

# config and constraints
Faker.seed(0)
fake = Faker()

start_date = date(2022, 1, 1)
num_days = 25
dates = [start_date + timedelta(n) for n in range(num_days)]
end_date = date(2022, 1, num_days)
num_customers = 20

import pytz
tz_list = pytz.all_timezones
can_tz = list(filter(lambda k: 'Canada' in k, tz_list))

In [50]:
# define json object
jsonObject={
  "GPSData": [
    {
      "SnappedLongitude": fake.pyfloat(min_value=-180, max_value=180),
      "GPSLongitude": fake.pyfloat(min_value=-180, max_value=180),
      "TimeStamp": fake.iso8601(),
      "GPSHeading": fake.random_int(0, 360),
      "GPSAltitude": fake.random_int(0, 500),
      "GPSSpeed": fake.pyfloat(min_value=0, max_value=250),
      "Deviation": fake.pyfloat(),
      "FromNodeID": fake.pyfloat(),
      "WayID": fake.pyfloat(),
      "SnappedLatitude": fake.pyfloat(min_value=-90, max_value=90),
      "ToNodeID": fake.pyfloat(),
      "HorizontalAccuracy": fake.pyint(),
      "GPSLatitude": fake.pyfloat(min_value=-90, max_value=90),
      "VerticalAccuracy": fake.pyint(min_value=0, max_value=100),
    }
  ],
  "DataAlgorithmVersion": fake.pystr(),
  "TripID": fake.unique.random_int(),
  "StartTime": fake.iso8601(),
  "CompanyID": ''.join(random.choice('abcdefghijklmnopqrtzusv') for i in range(5)).upper(),
  "StartTimeZone": random.choice(can_tz),
  "DriverID": fake.pystr(),
  "EndTime": fake.iso8601(),
  "EndTimeZone": random.choice(can_tz),
}
json.dumps(jsonObject)

'{"GPSData": [{"SnappedLongitude": -13.41883748, "GPSLongitude": -154.557188, "TimeStamp": "1982-10-17T19:08:30", "GPSHeading": 280, "GPSAltitude": 493, "GPSSpeed": 21.51602869361, "Deviation": -5180.25784375797, "FromNodeID": 74357376203595.9, "WayID": 3494596985436.38, "SnappedLatitude": 87.23884, "ToNodeID": -382601465276.35, "HorizontalAccuracy": 7421, "GPSLatitude": -60.4513415173415, "VerticalAccuracy": 17}], "DataAlgorithmVersion": "HQHfiScknqtDSSXFzHxw", "TripID": 8813, "StartTime": "1982-02-18T09:07:28", "CompanyID": "DMGMN", "StartTimeZone": "Canada/Pacific", "DriverID": "kEahrPLrGciZpuCpymcj", "EndTime": "2005-08-05T11:36:52", "EndTimeZone": "Canada/Pacific"}'

In [59]:
# datamodel-codegen --input sensor_dummy_data.json --input-file-type json --output model_fromJSON.py

class GPSDatum(BaseModel):
    SnappedLongitude: float
    GPSLongitude: float
    TimeStamp: str
    GPSHeading: int
    GPSAltitude: int
    GPSSpeed: float
    Deviation: float
    FromNodeID: float
    WayID: float
    SnappedLatitude: float
    ToNodeID: float
    HorizontalAccuracy: int
    GPSLatitude: float
    VerticalAccuracy: int


class Model(BaseModel):
    GPSData: List[GPSDatum]
    DataAlgorithmVersion: str
    TripID: int
    StartTime: str
    CompanyID: str
    StartTimeZone: str
    DriverID: str
    EndTime: str
    EndTimeZone: str

In [60]:
print(Model.schema_json())

{"title": "Model", "type": "object", "properties": {"GPSData": {"title": "Gpsdata", "type": "array", "items": {"$ref": "#/definitions/GPSDatum"}}, "DataAlgorithmVersion": {"title": "Dataalgorithmversion", "type": "string"}, "TripID": {"title": "Tripid", "type": "integer"}, "StartTime": {"title": "Starttime", "type": "string"}, "CompanyID": {"title": "Companyid", "type": "string"}, "StartTimeZone": {"title": "Starttimezone", "type": "string"}, "DriverID": {"title": "Driverid", "type": "string"}, "EndTime": {"title": "Endtime", "type": "string"}, "EndTimeZone": {"title": "Endtimezone", "type": "string"}}, "required": ["GPSData", "DataAlgorithmVersion", "TripID", "StartTime", "CompanyID", "StartTimeZone", "DriverID", "EndTime", "EndTimeZone"], "definitions": {"GPSDatum": {"title": "GPSDatum", "type": "object", "properties": {"SnappedLongitude": {"title": "Snappedlongitude", "type": "number"}, "GPSLongitude": {"title": "Gpslongitude", "type": "number"}, "TimeStamp": {"title": "Timestamp", 

In [61]:
faker = JSF(Model.schema())
print(faker.generate())

{'GPSData': [{'SnappedLongitude': 9676.0, 'GPSLongitude': 7710.0, 'TimeStamp': 'modi elit. officiis illum architecto quas veniam', 'GPSHeading': 8198, 'GPSAltitude': 289, 'GPSSpeed': 233.0, 'Deviation': 8122.0, 'FromNodeID': 89.0, 'WayID': 2503.0, 'SnappedLatitude': 4955.0, 'ToNodeID': 2659.0, 'HorizontalAccuracy': 9496, 'GPSLatitude': 6643.0, 'VerticalAccuracy': 4627}, {'SnappedLongitude': 2500.0, 'GPSLongitude': 7917.0, 'TimeStamp': 'molestias, adipisicing ipsum, magnam,', 'GPSHeading': 8970, 'GPSAltitude': 342, 'GPSSpeed': 3521.0, 'Deviation': 5470.0, 'FromNodeID': 7699.0, 'WayID': 5093.0, 'SnappedLatitude': 8735.0, 'ToNodeID': 5500.0, 'HorizontalAccuracy': 3614, 'GPSLatitude': 9252.0, 'VerticalAccuracy': 2870}, {'SnappedLongitude': 7474.0, 'GPSLongitude': 2095.0, 'TimeStamp': 'placeat', 'GPSHeading': 5748, 'GPSAltitude': 5893, 'GPSSpeed': 7982.0, 'Deviation': 6010.0, 'FromNodeID': 7325.0, 'WayID': 9590.0, 'SnappedLatitude': 8159.0, 'ToNodeID': 6254.0, 'HorizontalAccuracy': 7483, 'G