In [1]:
import random
import numpy as np
import pandas as pd

In [2]:
def convert_loc(row):  #convert longlat to meter/km
    splitted = row['loc'].split(',')
    row['lon'] = splitted[0]
    row['lat'] = splitted[1]
    return row
def convert_speed(row):  #convert longlat to meter/km
    splitted = row['v'].split(',')
    row['vx'] = splitted[0]
    row['vy'] = splitted[1]
    return row

In [3]:
infile = "events_Approach - Bypass250.txt"
df = pd.read_csv(infile, sep='|', names=['t', 'id', 'loc', 'v'])
df = df.apply(lambda row: convert_loc(row), axis=1)
df = df.apply(lambda row: convert_speed(row), axis=1)
df['vx'] = df['vx'].astype(float)
df['vy'] = df['vy'].astype(float)
df['lon'] = df['lon'].astype(float)
df['lat'] = df['lat'].astype(float)

In [4]:
df.head()

Unnamed: 0,t,id,loc,v,lon,lat,vx,vy
0,0,0,444659485893,"-199.664,-4.03242",444659.0,485893.0,-199.664,-4.03242
1,0,1,"-74090.9,213393","218.884,208.741",-74090.9,213393.0,218.884,208.741
2,0,2,-101591987143,"236.781,-2.63515",-101591.0,987143.0,236.781,-2.63515
3,0,3,195909633393,"4.99736,-122.464",195909.0,633393.0,4.99736,-122.464
4,0,4,-109091525893,"243.18,-33.8731",-109091.0,525893.0,243.18,-33.8731


In [5]:
MIN_vx = df.vx.min()
MAX_vx = df.vx.max()
MIN_vy = df.vy.min()
MAX_vy = df.vy.max()

MIN_lon = df.lon.min()
MAX_lon = df.lon.max()
MIN_lat = df.lat.min()
MAX_lat = df.lat.max()
print("vx range is "+str(MIN_vx)+" to " +str(MAX_vx))
print("vy range is "+str(MIN_vy)+" to " +str(MAX_vy))
print("lon range is "+str(MIN_lon)+" to " +str(MAX_lon))
print("latrange is "+str(MIN_lat)+" to " +str(MAX_lat))

vx range is -282.472 to 480.206
vy range is -198.659 to 545.744
lon range is -155341.0 to 752877.0
latrange is 212143.0 to 1078390.0


In [6]:
df.lon.head()

0    444659.0
1    -74090.9
2   -101591.0
3    195909.0
4   -109091.0
Name: lon, dtype: float64

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12500 entries, 0 to 12499
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   t       12500 non-null  int64  
 1   id      12500 non-null  int64  
 2   loc     12500 non-null  object 
 3   v       12500 non-null  object 
 4   lon     12500 non-null  float64
 5   lat     12500 non-null  float64
 6   vx      12500 non-null  float64
 7   vy      12500 non-null  float64
dtypes: float64(4), int64(2), object(2)
memory usage: 781.4+ KB


In [12]:
def trunc(values, decs=0):
    return np.trunc(values*10**decs)/(10**decs)

In [25]:
vesselNum = int(df.id.max()/2)
rMin = 100
rMax = 400
data = []
i = 0
while i < vesselNum:
    vesID = i
    vx = random.uniform(MIN_vx, MAX_vx)
    vy = random.uniform(MIN_vy, MAX_vy)
    r = random.randrange(rMin, rMax)
    data.append([vesID, vx, vy, r])
    i += 1
    
result = pd.DataFrame(data, columns = ['id','vx', 'vy', 'r'])
result['lon'] = trunc(np.random.normal(df.lon.mean(), df.lon.std(), vesselNum), 4)
result['lat'] = trunc(np.random.normal(df.lat.mean(), df.lat.std(), vesselNum), 4)
result['vx'] = trunc(result['vx'], 4)
result['vy'] = trunc(result['vy'], 4)
result.head()

Unnamed: 0,id,vx,vy,r,lon,lat
0,0,-31.6186,145.1567,199,263959.2191,388218.5706
1,1,477.6276,270.6136,236,271133.9694,654328.662
2,2,123.4858,94.5313,258,330206.7864,296552.4301
3,3,-47.2327,303.82,293,592547.5057,688452.9985
4,4,302.9791,377.9543,330,-157184.1985,930529.063


In [17]:
def join_cols(row):
    row['loc'] = str(row['lon'])+','+str(row['lat'])
    row['v'] = str(row['vx'])+','+str(row['vy'])
    return row

In [26]:
result = result.apply(lambda row: join_cols(row), axis=1)
del result['vx']
del result['vy']
del result['lon']
del result['lat']
result['id'] = result['id'].astype(int)
neworder = ['id','loc','v', 'r']
result=result.reindex(columns=neworder)
result.head()

Unnamed: 0,id,loc,v,r
0,0,"263959.2191,388218.5706","-31.6186,145.1567",199.0
1,1,"271133.9694,654328.662","477.6276,270.6136",236.0
2,2,"330206.7864,296552.4301","123.4858,94.5313",258.0
3,3,"592547.5057,688452.9985","-47.2327,303.82",293.0
4,4,"-157184.1985,930529.063","302.9791,377.9543",330.0


In [27]:
filename = 'vessel_'+str(len(result))+'.csv'
result.to_csv(filename, sep='|', header=False, index=False)