In [1]:
from datetime import date, timedelta
import os
import random
import string
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import pandas as pd
import report_utils
from testdata.polish_names.generator import generate_people
from testdata.report_data import samples

In [3]:
report_utils.report_fields

{'title': 'title',
 'desc': 'description',
 'fromCoords': 'fromCoords',
 'toCoords': 'toCoords',
 'creationDate': 'creationDate',
 'authFirstName': 'authorFirstName',
 'authLastName': 'authorLastName',
 'id': 'reportId',
 'lastUpdateDate': 'lastUpdateDate',
 'linkedMetadata': 'linkedMetadata',
 'tags': 'tags'}

In [4]:
def generateNames(n):
    n_males = n//2
    n_females = n - n_males
    males = generate_people(n_males, "M")
    females = generate_people(n_females, "F")
    
    res = males + females
    random.shuffle(res)
    
    first_names = [item.split()[0] for item in res]
    last_names = [item.split()[1] for item in res]
    
    return first_names, last_names

generateNames(10)

(['Jadwiga',
  'Sebastian',
  'Ewa',
  'Artur',
  'Janina',
  'Daniel',
  'Zofia',
  'Daniel',
  'Bożena',
  'Jarosław'],
 ['Kaczmarek',
  'Urbański',
  'Sawicka',
  'Sobczak',
  'Kamińska',
  'Wieczorek',
  'Wasilewska',
  'Nowak',
  'Rutkowska',
  'Gajewski'])

In [5]:
def generateTitles(n):
    return random.sample(samples.titles, n)

generateTitles(10)

['Infantry Battalion Deployment Tracking',
 'Enemy Convoy Movement Tracking',
 'Hostile Communications Interception Analysis and Operations',
 'Ballistic Missile Defense Intelligence',
 'Enemy Ground Assault Route Monitoring',
 'Enemy Coastline Surveillance',
 'Satellite Surveillance of Eastern Border',
 'Ballistic Missile Defense Surveillance',
 'Enemy UAV Threat Analysis and Monitoring',
 'Missile Launch Site Detection']

In [6]:
def generateDescriptions(n):
    return random.sample(samples.descriptions, n)

generateDescriptions(10)

["Hostile forces spotted digging in and fortifying positions in no man's land. Aerial reconnaissance recommended to gather intelligence.",
 'Hostage situation reported at embassy in enemy territory. Special forces team deployed to rescue personnel and secure the building.',
 'A large enemy aircraft has been detected flying near our air space. Our air defense systems have been activated and fighter jets have been scrambled to intercept.',
 'Satellite imagery shows a new missile launch site under construction in enemy territory. The site is believed to be capable of launching long-range missiles.',
 'Enemy air activity has been detected near the border. Air defense systems are on standby and fighter jets are ready to scramble in case of an attack.',
 'Intel suggests enemy plans to launch a large-scale attack on our border. Reinforcements and increased readiness recommended.',
 'Satellite imagery reveals increased military activity near the coastal region. Our naval units and coastal defe

In [7]:
def generateTags(n, max_tags):
    tags = []
    for i in range(n):
        tags.append(random.sample(samples.tags, random.randrange(1, max_tags)))
    return tags

generateTags(10, 3)

[['Cargo Terminal'],
 ['Maritime Patrol Aircraft', 'Target Acquisition Battery'],
 ['Aircraft Hangar'],
 ['Reservoir', 'Vehicle Convoy'],
 ['Ground-to-Air Missile System'],
 ['Anti-Tank Guided Missile System', 'Special Operations Forces'],
 ['Minefield'],
 ['Forward Air Control Post', 'Transport Helicopter'],
 ['Helicopter Landing Zone'],
 ['Ground-to-Air Missile System', 'Battle Tank']]

In [8]:
def generateDates(n):
    # initializing dates ranges 
    date1, date2 = date(2017, 6, 3), date(2020, 1, 15)
    
    # getting days between dates
    dates_bet = date2 - date1
    total_days = dates_bet.days

    creationDates = []
    updateDates = []
    
    for idx in range(n):
        random.seed(a=None)

        # getting random days
        randay = random.randrange(total_days)

        # getting random dates 
        creationDates.append(date1 + timedelta(days=randay))
        updateDates.append(date2 + timedelta(days=randay))
        
    return creationDates, updateDates

generateDates(10)

([datetime.date(2017, 7, 9),
  datetime.date(2018, 12, 3),
  datetime.date(2019, 1, 16),
  datetime.date(2019, 12, 17),
  datetime.date(2018, 11, 13),
  datetime.date(2019, 4, 3),
  datetime.date(2018, 5, 6),
  datetime.date(2017, 7, 12),
  datetime.date(2019, 12, 30),
  datetime.date(2018, 1, 28)],
 [datetime.date(2020, 2, 20),
  datetime.date(2021, 7, 16),
  datetime.date(2021, 8, 29),
  datetime.date(2022, 7, 30),
  datetime.date(2021, 6, 26),
  datetime.date(2021, 11, 14),
  datetime.date(2020, 12, 17),
  datetime.date(2020, 2, 23),
  datetime.date(2022, 8, 12),
  datetime.date(2020, 9, 10)])

In [9]:
def generateBool(n): # Metadata available
    res = []
    for i in range(n):
        if(random.randint(1, 100)<50):
            res.append(False)
        else:
            res.append(True)
    return res

generateBool(10)

[False, True, False, True, False, False, False, True, False, True]

In [10]:
def generateIDs(n):
    ids = []
    while len(ids) < n:
        id = ''.join(random.choices(string.ascii_uppercase, k=2)) + ''.join(random.choices(string.digits, k=7))
        if id not in ids:
            ids.append(id)
    return ids

generateIDs(10)

['DS5887610',
 'OQ9129315',
 'RQ2813996',
 'OU4162323',
 'TO5521144',
 'ER9515621',
 'DY8941753',
 'FZ6921280',
 'HB6001981',
 'EB1461256']

In [22]:
def generateCoordinates(n):
    rectangles = []
    for i in range(n):
        # Generate random coordinates 
        x1 = random.uniform(49.00, 55.00)
        y1 = random.uniform(14.00, 32.00)
        
        side_length = random.uniform(0.01, 0.01)
        x2 = x1 + side_length
        y2 = y1 + side_length

        # Ensure that the second coordinate is always lower-right corner
        if x2 < x1:
            x1, x2 = x2, x1
        if y2 > y1:
            y1, y2 = y2, y1

        # Add the rectangle coordinates to the list
        rectangles.append(((x1, y1), (x2, y2)))

    fromCoordx = [item[0][0] for item in rectangles]
    fromCoordy = [item[0][1] for item in rectangles]
    toCoordx = [item[1][0] for item in rectangles]
    toCoordy = [item[1][1] for item in rectangles]
    
    return fromCoordx, fromCoordy, toCoordx, toCoordy

generateCoordinates(10)

([52.87952626603499,
  53.22796036895823,
  53.54409875048582,
  50.84449697854502,
  52.90486039541383,
  51.07306014364216,
  50.908620839212716,
  51.19536133190682,
  51.024577399131374,
  49.82697517409197],
 [22.379884011028185,
  25.219389247384072,
  23.12788462982216,
  18.855702479309098,
  25.144759753589813,
  22.809824804429805,
  16.54933844932276,
  30.20210754257404,
  16.680284660018845,
  29.193900470044493],
 [52.88952626603499,
  53.23796036895823,
  53.55409875048582,
  50.854496978545015,
  52.914860395413825,
  51.08306014364216,
  50.918620839212714,
  51.205361331906815,
  51.03457739913137,
  49.83697517409197],
 [22.369884011028184,
  25.20938924738407,
  23.117884629822157,
  18.845702479309097,
  25.13475975358981,
  22.799824804429804,
  16.53933844932276,
  30.19210754257404,
  16.670284660018844,
  29.18390047004449])

In [12]:
def generateDummyReports(n):
    first_names, last_names = generateNames(n)
    titles = generateTitles(n)
    descriptions = generateDescriptions(n)
    tags = generateTags(n, 3) # From 1 to 3 tags
    creationDates, updateDates = generateDates(n)
    metadata = generateBool(n)
    ids = generateIDs(n)
    fromCoordx, fromCoordy, toCoordx, toCoordy = generateCoordinates(n)
    
    reports = pd.DataFrame(
    {report_utils.report_fields['title']: titles,
     report_utils.report_fields['desc']: descriptions,
     report_utils.report_fields['fromCoordx']: fromCoordx,
     report_utils.report_fields['fromCoordy']: fromCoordy,
     report_utils.report_fields['toCoordx']: toCoordx,
     report_utils.report_fields['toCoordy']: toCoordy,
     report_utils.report_fields['creationDate']: creationDates,
     report_utils.report_fields['authFirstName']: first_names,
     report_utils.report_fields['authLastName']: last_names,
     report_utils.report_fields['id']: ids,
     report_utils.report_fields['lastUpdateDate']: updateDates,
     report_utils.report_fields['linkedMetadata']: metadata,
     report_utils.report_fields['tags']: tags
    })
    
    return reports

In [13]:
reports = generateDummyReports(100)

In [17]:
reports.to_csv("dummy_reports.csv")

In [19]:
reports = pd.read_csv("dummy_reports.csv", index_col=0)

In [20]:
reports

Unnamed: 0,title,description,fromCoords,toCoords,creationDate,authorFirstName,authorLastName,reportId,lastUpdateDate,linkedMetadata,tags
0,Ground Communication Tracking,Multiple enemy aircraft have been spotted near...,"(50.49691354339553, 23.983716797959882)","(50.50691354339553, 23.97371679795988)",2018-11-16,Magdalena,Kaźmierczak,HG3355187,2021-06-29,True,"['River Crossing Operations', 'Communication T..."
1,Ballistic Missile Defense and Early Warning Sy...,Enemy ground troops have been spotted near the...,"(51.06800940724182, 29.0742489595214)","(51.078009407241815, 29.0642489595214)",2019-12-06,Katarzyna,Szczepańska,QA6480408,2022-07-19,True,['Rapid Reaction Force']
2,Enemy Coastline Surveillance Analysis and Moni...,Satellite imagery shows an increase in enemy t...,"(49.82712489668053, 22.996467648046345)","(49.83712489668053, 22.986467648046343)",2017-09-12,Marian,Duda,RI9516795,2020-04-25,False,"['Railway', 'Forward Air Control Post']"
3,Ground Electronic Warfare Operations and Analysis,Enemy artillery units have been detected near ...,"(50.5066137336635, 21.01218535309926)","(50.5166137336635, 21.002185353099257)",2018-06-21,Patrycja,Lewandowska,MP9545937,2021-02-01,True,"['Munitions Factory', 'Minefield']"
4,Air Defense System Analysis,Multiple enemy ships have been spotted in the ...,"(49.71330906225228, 23.41502337043732)","(49.72330906225228, 23.40502337043732)",2018-03-26,Roman,Wojciechowski,ZV8492009,2020-11-06,True,['Ship']
...,...,...,...,...,...,...,...,...,...,...,...
95,Enemy Amphibious Assault Threat Assessment,Hostile aircraft spotted in restricted airspac...,"(50.52749190125472, 19.00007731221442)","(50.53749190125472, 18.990077312214417)",2020-01-04,Katarzyna,Michalska,OP7317689,2022-08-17,False,['Maritime Patrol Aircraft']
96,Suspicious Military Activity Report,A large enemy fleet has been detected approach...,"(50.05961622532844, 21.17399766594679)","(50.06961622532844, 21.163997665946788)",2019-07-31,Halina,Król,QW5693951,2022-03-13,True,['Mobile Maintenance Unit']
97,Ground Electronic Warfare Assessment,Satellite imagery reveals increased military a...,"(53.217102947052865, 31.032791888696565)","(53.22710294705286, 31.022791888696563)",2019-09-26,Janina,Wójcik,KT1748355,2022-05-09,True,['Launch Pad']
98,Ground-Based Electronic Warfare Detection,A large enemy aircraft has been detected flyin...,"(49.5246997405049, 18.822652352059578)","(49.5346997405049, 18.812652352059576)",2018-02-07,Waldemar,Jankowski,TX0768437,2020-09-20,True,['Heavy Equipment Storage']
