In [1]:
from datetime import date, timedelta
import os
import random
import string
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import pandas as pd
import report_utils
from testdata.polish_names.generator import generate_people
from testdata.report_data import samples

In [None]:
report_utils.report_fields

In [None]:
def generateNames(n):
    n_males = n//2
    n_females = n - n_males
    males = generate_people(n_males, "M")
    females = generate_people(n_females, "F")
    
    res = males + females
    random.shuffle(res)
    
    first_names = [item.split()[0] for item in res]
    last_names = [item.split()[1] for item in res]
    
    return first_names, last_names

generateNames(10)

In [None]:
def generateTitles(n):
    return random.sample(samples.titles, n)

generateTitles(10)

In [None]:
def generateDescriptions(n):
    return random.sample(samples.descriptions, n)

generateDescriptions(10)

In [None]:
def generateTags(n, max_tags):
    tags = []
    for i in range(n):
        tags.append(random.sample(samples.tags, random.randrange(1, max_tags)))
    return tags

generateTags(10, 3)

In [None]:
def generateDates(n):
    # initializing dates ranges 
    date1, date2 = date(2017, 6, 3), date(2020, 1, 15)
    
    # getting days between dates
    dates_bet = date2 - date1
    total_days = dates_bet.days

    creationDates = []
    updateDates = []
    
    for idx in range(n):
        random.seed(a=None)

        # getting random days
        randay = random.randrange(total_days)

        # getting random dates 
        creationDates.append(date1 + timedelta(days=randay))
        updateDates.append(date2 + timedelta(days=randay))
        
    return creationDates, updateDates

generateDates(10)

In [None]:
def generateBool(n): # Metadata available
    res = []
    for i in range(n):
        if(random.randint(1, 100)<50):
            res.append(False)
        else:
            res.append(True)
    return res

generateBool(10)

In [None]:
def generateIDs(n):
    ids = []
    while len(ids) < n:
        id = ''.join(random.choices(string.ascii_uppercase, k=2)) + ''.join(random.choices(string.digits, k=7))
        if id not in ids:
            ids.append(id)
    return ids

generateIDs(10)

In [None]:
def generateCoordinates(n):
    rectangles = []
    for i in range(n):
        # Generate random coordinates 
        x1 = random.uniform(49.00, 55.00)
        y1 = random.uniform(14.00, 32.00)
        
        side_length = random.uniform(0.01, 0.01)
        x2 = x1 + side_length
        y2 = y1 + side_length

        # Ensure that the second coordinate is always lower-right corner
        if x2 < x1:
            x1, x2 = x2, x1
        if y2 > y1:
            y1, y2 = y2, y1

        # Add the rectangle coordinates to the list
        rectangles.append(((x1, y1), (x2, y2)))

    fromCoordx = [item[0][0] for item in rectangles]
    fromCoordy = [item[0][1] for item in rectangles]
    toCoordx = [item[1][0] for item in rectangles]
    toCoordy = [item[1][1] for item in rectangles]
    
    return fromCoordx, fromCoordy, toCoordx, toCoordy

generateCoordinates(10)

In [None]:
def generateDummyReports(n):
    first_names, last_names = generateNames(n)
    titles = generateTitles(n)
    descriptions = generateDescriptions(n)
    tags = generateTags(n, 3) # From 1 to 3 tags
    creationDates, updateDates = generateDates(n)
    metadata = generateBool(n)
    ids = generateIDs(n)
    fromCoordx, fromCoordy, toCoordx, toCoordy = generateCoordinates(n)
    
    reports = pd.DataFrame(
    {report_utils.report_fields['title']: titles,
     report_utils.report_fields['desc']: descriptions,
     report_utils.report_fields['fromCoordx']: fromCoordx,
     report_utils.report_fields['fromCoordy']: fromCoordy,
     report_utils.report_fields['toCoordx']: toCoordx,
     report_utils.report_fields['toCoordy']: toCoordy,
     report_utils.report_fields['creationDate']: creationDates,
     report_utils.report_fields['authFirstName']: first_names,
     report_utils.report_fields['authLastName']: last_names,
     report_utils.report_fields['id']: ids,
     report_utils.report_fields['lastUpdateDate']: updateDates,
     report_utils.report_fields['linkedMetadata']: metadata,
     report_utils.report_fields['tags']: tags
    })
    
    return reports

In [None]:
reports = generateDummyReports(100)

In [None]:
reports

In [37]:
reports.to_csv("dummy_reports.csv")

In [50]:
reports = pd.read_csv("dummy_reports.csv", index_col=0)

In [51]:
reports

Unnamed: 0,title,description,fromCoordsx,fromCoordsy,toCoordx,toCoordy,creationDate,authorFirstName,authorLastName,reportId,lastUpdateDate,linkedMetadata,tags
0,Ground Troop Concentration Analysis,Enemy artillery units have been detected near ...,49.999825,18.693754,50.009825,18.683754,2019-12-10,Marek,Wasilewski,SO3390244,2022-07-23,True,"['Observation Post', 'Maritime Patrol Aircraft']"
1,Hostile Communications Interception Analysis a...,Enemy air activity has been detected near the ...,50.161974,30.971135,50.171974,30.961135,2019-03-07,Robert,Zieliński,IG6089584,2021-10-18,True,"['Infrared Emitter', 'Transport Helicopter']"
2,Ballistic Missile Launch Early Warning,A large enemy fleet has been detected approach...,52.473758,31.546478,52.483758,31.536478,2019-09-03,Teresa,Kowalska,ML4213237,2022-04-16,True,"['Missile Defense System', 'Ammunition Depot']"
3,Ballistic Missile Defense Surveillance,Satellite imagery shows an increase in enemy t...,54.855681,23.970608,54.865681,23.960608,2017-06-13,Anna,Dąbrowska,ZL1567473,2020-01-25,False,"['Naval Gunfire Support', 'Airborne Early Warn..."
4,Hostile UAV Threat Assessment,Satellite imagery shows enemy naval buildup ne...,49.618902,27.976024,49.628902,27.966024,2018-03-25,Natalia,Sadowska,MS8908998,2020-11-05,False,"['Marine Amphibious Unit', 'Engineering Works']"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Enemy Communications Interception,Hostile forces spotted digging in and fortifyi...,50.815993,24.045529,50.825993,24.035529,2017-07-02,Janina,Sikora,FX4678709,2020-02-13,False,"['Mine Countermeasures Vessel', 'Ground Attack..."
96,Maritime Domain Control Operations,Intelligence suggests enemy plans to launch cy...,51.619496,26.839765,51.629496,26.829765,2017-07-04,Edward,Nowak,CC7907280,2020-02-15,True,"['Anti-Tank Guided Missile System', 'Forward A..."
97,Satellite Communication Interception,Enemy ground forces have been spotted moving t...,51.391970,28.537668,51.401970,28.527668,2019-05-25,Mieczysław,Głowacki,CZ9392491,2022-01-05,False,['Ship']
98,Hostile Ballistic Missile Launch Site Analysis...,Enemy forces have occupied a key town and set ...,53.829649,23.369496,53.839649,23.359496,2018-02-07,Irena,Maciejewska,RX1992921,2020-09-20,False,"['Forward Operating Base', 'Aircraft Hangar']"


In [41]:
report_fields = {
    "title": "title",
    "desc": "description",
    "fromCoordx": "fromCoordsx",
    "fromCoordy": "fromCoordsy",
    "toCoordx": "toCoordx",
    "toCoordy": "toCoordy",
    "creationDate": "creationDate",
    "authFirstName": "authorFirstName",
    "authLastName": "authorLastName",
    "id": "reportId",
    "lastUpdateDate": "lastUpdateDate",
    "linkedMetadata": "linkedMetadata",
    "tags": "tags"
}

In [42]:
def convertReports(reports_df):
    reports_entries = reports_df.to_dict(orient="records")
    for record in reports_entries:
        record[report_fields['tags']] = ast.literal_eval(record[report_fields['tags']])
        record[report_fields['creationDate']] = datetime.strptime(record[report_fields['creationDate']], '%Y-%m-%d').date()
        record[report_fields['lastUpdateDate']] = datetime.strptime(record[report_fields['lastUpdateDate']], '%Y-%m-%d').date()

    return reports_entries

In [53]:
reports_entries = convertReports(reports)

In [58]:
for i, report in enumerate(reports_entries[:5]):
    print(i)
    print(type(report))

0
<class 'dict'>
1
<class 'dict'>
2
<class 'dict'>
3
<class 'dict'>
4
<class 'dict'>


In [57]:
"tag"['auth']

  "tag"['auth']
  "tag"['auth']
  "tag"['auth']


TypeError: string indices must be integers

In [None]:
cond[==5]