In [1]:
import pandas as pd
import folium as f
import os
import matplotlib.pyplot as plt
from glob import glob
import seaborn as sns
import datetime
import random
import numpy as np
from haversine import haversine
from sklearn.cluster import DBSCAN
import itertools
from tqdm import tqdm

In [2]:
from GridIndexer import GridIndexer
from SV30Preprocess import SV30Preprocess

min_lat = 33.120581  # Minimum latitude value
max_lat = 38.726809  # Maximum latitude value
min_lon = 124.896901  # Minimum longitude value
max_lon = 132.058734  # Maximum longitude value
cell_size = 0.001  # Cell size in degrees


indexer = GridIndexer(min_lat, max_lat, min_lon, max_lon, cell_size)
preprocess = SV30Preprocess()

In [3]:
'''
LTEM Data Load (One Day)
'''

all_csv = glob('/STORAGE/DATA/01_KRISO_G_MAPPED_DATA/Position_LTEM/*.csv')
all_csv = sorted(all_csv)
selected_csv = all_csv[0:25]

total = pd.DataFrame()

for sel in tqdm(selected_csv):
    temp = pd.read_csv(sel, sep = ',', encoding='utf-8')
    total = pd.concat([total, temp])
print('One day LTEM Data Load ')

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:05<00:00,  4.18it/s]

One day LTEM Data Load 





In [4]:
total

Unnamed: 0,szMsgSendDT,SHIP_CODE,dSOG,dCOG,dLat,dLon
0,20220628110654000,BE010da2,6.331898,149.0,36.965206,126.828316
1,20220628110654000,AB110b5d,10.100000,272.0,37.874233,129.008636
2,20220628110654000,AB110b8e,0.000000,227.0,38.499546,128.425110
3,20220628110654000,AB0908a0,0.000000,0.0,34.556713,127.675613
4,20220628110654000,BD010d54,0.000000,275.0,35.978321,126.622986
...,...,...,...,...,...,...
738051,20220629115959000,AB02044d,9.479563,291.0,36.348801,129.429718
738052,20220629115959000,AB08064b,0.000000,333.0,36.249516,126.536850
738053,20220629115959000,BE020e3f,7.910589,173.0,35.364128,125.724831
738054,20220629115959000,AB010107,0.000000,256.0,36.675259,126.128418


In [5]:
def Aship_to_dict(df):
    start_a_ship = df[df.SHIP_CODE.str.startswith('A')].SHIP_CODE.unique()

    # key 값을 어선 종류, value 값을 해당 어선의 dataframe 으로 받게끔 했다.
    dict_ = {}

    for a_ship in start_a_ship:
        value = df[df.SHIP_CODE.values == a_ship]
        dict_[a_ship] = value

    print('Key : SHIP_CODE startswith A / Value : A Dataframe END.')

    return dict_

In [7]:
'''
전처리
'''
dict_ = Aship_to_dict(total)
upper_2 = preprocess.upper_2_dataframe(dict_)
add_dist = preprocess.add_distance_df(upper_2)
dist_upper_10 = preprocess.dist_upper_10_df(add_dist)
reset_time = preprocess.reset_index_time_series(dist_upper_10)
add_cog = preprocess.add_cog_interval(reset_time)
straight_line = preprocess.similar_straight_cog(add_cog)

final = preprocess.dataframe_preprocessed(straight_line)

Key : SHIP_CODE startswith A / Value : A Dataframe END.
SOG Filtering END.


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Derived variable dist END. 
Dist > 10 Filtering END.
Time_Series readability END.
Derived variable dCOG END.
3 straight Line END.
Preprocessed Dict -> DataFrame END.


In [8]:
final.reset_index(drop = True, inplace = True)
final

Unnamed: 0,szMsgSendDT,SHIP_CODE,dSOG,dCOG,dLat,dLon,dist,dCOG_diff
0,2022-06-29-04-27-27,AB110b5d,9.900000,41.0,37.918968,128.953674,55.238613,24.0
1,2022-06-29-04-27-30,AB110b5d,10.600000,34.0,37.919079,128.953796,16.308807,7.0
2,2022-06-29-04-27-42,AB110b5d,10.900000,40.0,37.919556,128.954269,67.327630,6.0
3,2022-06-29-04-28-09,AB110b5d,10.500000,52.0,37.920368,128.955643,150.580022,12.0
4,2022-06-29-04-28-12,AB110b5d,10.700000,55.0,37.920456,128.955795,16.562920,3.0
...,...,...,...,...,...,...,...,...
365585,2022-06-29-11-52-37,AB090842,10.400000,70.0,35.977085,126.527885,15.938311,5.0
365586,2022-06-29-11-52-40,AB090842,10.100000,71.0,35.977135,126.528053,16.078794,1.0
365587,2022-06-29-11-52-49,AB090842,10.200000,93.0,35.977180,126.528580,47.643206,22.0
365588,2022-06-29-11-48-37,AB09094c,3.554099,20.0,34.469666,127.294792,11.661909,70.0


In [9]:
'''
전처리 후 df 에 Grid 추가
'''
def add_grid_col(x):
    grid = indexer.get_index(x['dLat'], x['dLon'])
    return grid
    
final['Grid'] = final.apply(add_grid_col, axis = 1)

In [10]:
final

Unnamed: 0,szMsgSendDT,SHIP_CODE,dSOG,dCOG,dLat,dLon,dist,dCOG_diff,Grid
0,2022-06-29-04-27-27,AB110b5d,9.900000,41.0,37.918968,128.953674,55.238613,24.0,34367333
1,2022-06-29-04-27-30,AB110b5d,10.600000,34.0,37.919079,128.953796,16.308807,7.0,34367333
2,2022-06-29-04-27-42,AB110b5d,10.900000,40.0,37.919556,128.954269,67.327630,6.0,34367334
3,2022-06-29-04-28-09,AB110b5d,10.500000,52.0,37.920368,128.955643,150.580022,12.0,34374497
4,2022-06-29-04-28-12,AB110b5d,10.700000,55.0,37.920456,128.955795,16.562920,3.0,34374497
...,...,...,...,...,...,...,...,...,...
365585,2022-06-29-11-52-37,AB090842,10.400000,70.0,35.977085,126.527885,15.938311,5.0,20456303
365586,2022-06-29-11-52-40,AB090842,10.100000,71.0,35.977135,126.528053,16.078794,1.0,20456304
365587,2022-06-29-11-52-49,AB090842,10.200000,93.0,35.977180,126.528580,47.643206,22.0,20456304
365588,2022-06-29-11-48-37,AB09094c,3.554099,20.0,34.469666,127.294792,11.661909,70.0,9663936


In [14]:
'''
선박 별 Grid (중복 제거)
'''
aships = final.SHIP_CODE.unique()
grid_dict = {}
grid_dict['SHIP_CODE'] = final.SHIP_CODE.unique()

ships_grid = []

for aship in aships:
    unique_grid = final[final.SHIP_CODE.values == aship].Grid.unique().tolist()
    ships_grid.append(unique_grid)
    
grid_dict['Grid'] = ships_grid   # 딕셔너리로 보관

grid_df = pd.DataFrame(grid_dict)  # dict to DataFrame
grid_df

Unnamed: 0,SHIP_CODE,Grid
0,AB110b5d,"[34367333, 34367334, 34374497, 34374498, 34381..."
1,AB110b8e,"[38520770, 38527933, 38527934, 38535097, 38535..."
2,AB0908a0,"[10359062, 10366224, 10373386, 10380548, 10387..."
3,AB09096e,"[29136771, 29129608, 29129607, 29129606, 29122..."
4,AB080684,"[10925688, 10932849, 10940011, 10947173, 10954..."
...,...,...
882,AB0204c1,"[36895123, 36895124, 36887963, 36880801, 36880..."
883,AB080615,"[6663073, 6663072]"
884,AB090a4e,"[20176988, 20169826, 20176987, 20169825]"
885,AB090842,"[20141172, 20148334, 20148333, 20155495, 20155..."


In [21]:
grid_df.to_csv('격자.csv')

In [57]:
selected = 'AB110b5d', 'AB110b8e', 'AB0908a0', 'AB09096e', 'AB080684'

pos_lst = []

for sel in selected:
    pos_lst.append([final[final.SHIP_CODE.values == sel].dLat, final[final.SHIP_CODE.values == sel].dLon])
    
pos_lst

[[0      37.918968
  1      37.919079
  2      37.919556
  3      37.920368
  4      37.920456
           ...    
  446    37.908386
  447    37.908188
  448    37.908100
  449    37.907856
  450    37.907776
  Name: dLat, Length: 451, dtype: float64,
  0      128.953674
  1      128.953796
  2      128.954269
  3      128.955643
  4      128.955795
            ...    
  446    128.906296
  447    128.906021
  448    128.905884
  449    128.905548
  450    128.905411
  Name: dLon, Length: 451, dtype: float64],
 [451    38.498974
  452    38.499073
  453    38.499184
  454    38.499310
  455    38.499443
           ...    
  701    38.517792
  702    38.517368
  703    38.517246
  704    38.517143
  705    38.517021
  Name: dLat, Length: 255, dtype: float64,
  451    128.429993
  452    128.430176
  453    128.430344
  454    128.430527
  455    128.430756
            ...    
  701    128.458130
  702    128.457626
  703    128.457443
  704    128.457245
  705    128.457031
  Name: dLon

In [98]:
AB110b5d = final[final.SHIP_CODE.values == 'AB110b5d']
AB110b5d

Unnamed: 0,szMsgSendDT,SHIP_CODE,dSOG,dCOG,dLat,dLon,dist,dCOG_diff,Grid
0,2022-06-29-04-27-27,AB110b5d,9.9,41.0,37.918968,128.953674,55.238613,24.0,34367333
1,2022-06-29-04-27-30,AB110b5d,10.6,34.0,37.919079,128.953796,16.308807,7.0,34367333
2,2022-06-29-04-27-42,AB110b5d,10.9,40.0,37.919556,128.954269,67.327630,6.0,34367334
3,2022-06-29-04-28-09,AB110b5d,10.5,52.0,37.920368,128.955643,150.580022,12.0,34374497
4,2022-06-29-04-28-12,AB110b5d,10.7,55.0,37.920456,128.955795,16.562920,3.0,34374497
...,...,...,...,...,...,...,...,...,...
446,2022-06-29-11-02-15,AB110b5d,11.2,221.0,37.908386,128.906296,17.524017,1.0,34288504
447,2022-06-29-11-02-21,AB110b5d,10.3,225.0,37.908188,128.906021,32.667330,4.0,34288504
448,2022-06-29-11-02-24,AB110b5d,10.4,234.0,37.908100,128.905884,15.502910,9.0,34288503
449,2022-06-29-11-02-32,AB110b5d,9.3,216.0,37.907856,128.905548,40.054360,18.0,34288503


In [99]:
def AB110b5d_():
    selected = 'AB110b5d', 'AB110b8e', 'AB0908a0', 'AB09096e', 'AB080684'
    AB110b5d = final[final.SHIP_CODE.values == 'AB110b5d']

    pos_lst2 = []

    for coord in grid_df.iloc[0].Grid:
        pos_lst2.append(indexer.get_M_coord(coord))
    
    return pos_lst2

In [100]:
pos_lst2 = AB110b5d_()

In [101]:
'''
Grid 변환 잘 됐는지 비교
'''
import folium as f

m = f.Map(location = [final.iloc[0].dLat, final.iloc[0].dLon], zoom_start = 6)

for idx, rows in AB110b5d.iterrows():
    f.CircleMarker(location = (rows['dLat'], rows['dLon']), radius = 3).add_to(m)

for pos in pos_lst2:
    f.CircleMarker(location = pos, radius = 3, color = 'red').add_to(m)
    
print('Grid 변환 전 데이터 수 : {0}'.format(len(AB110b5d)))
print('Grid 변환 후 데이터 수 : {0}'.format(len(pos_lst2)))

m

Grid 변환 전 데이터 수 : 451
Grid 변환 후 데이터 수 : 188


In [104]:
grid_df

Unnamed: 0,SHIP_CODE,Grid
0,AB110b5d,"[34367333, 34367334, 34374497, 34374498, 34381..."
1,AB110b8e,"[38520770, 38527933, 38527934, 38535097, 38535..."
2,AB0908a0,"[10359062, 10366224, 10373386, 10380548, 10387..."
3,AB09096e,"[29136771, 29129608, 29129607, 29129606, 29122..."
4,AB080684,"[10925688, 10932849, 10940011, 10947173, 10954..."
...,...,...
882,AB0204c1,"[36895123, 36895124, 36887963, 36880801, 36880..."
883,AB080615,"[6663073, 6663072]"
884,AB090a4e,"[20176988, 20169826, 20176987, 20169825]"
885,AB090842,"[20141172, 20148334, 20148333, 20155495, 20155..."
