In [226]:
import pandas as pd
import numpy as np
import glob
from scipy import stats
from scipy.stats import kurtosis, skew

In [125]:
def spherical_dist_populate(lat_lis, lon_lis, r=3958.75):
    lat_mtx = np.array([lat_lis]).T * np.pi / 180
    lon_mtx = np.array([lon_lis]).T * np.pi / 180

    cos_lat_i = np.cos(lat_mtx)
    cos_lat_j = np.cos(lat_mtx)
    cos_lat_J = np.repeat(cos_lat_j, len(lat_mtx), axis=1).T

    lat_Mtx = np.repeat(lat_mtx, len(lat_mtx), axis=1).T
    cos_lat_d = np.cos(lat_mtx - lat_Mtx)

    lon_Mtx = np.repeat(lon_mtx, len(lon_mtx), axis=1).T
    cos_lon_d = np.cos(lon_mtx - lon_Mtx)

    mtx = r * np.arccos(cos_lat_d - cos_lat_i*cos_lat_J*(1 - cos_lon_d))
    return mtx

In [126]:
# Get list of files in folder
files = []
for file in glob.glob("/home/server/pi/homes/woodilla/Data/GFW_point/Patagonia_Shelf/feather" + "/*.feather"):
    files.append(file)
nfiles = sorted(files)[0:365]
nfiles[0]

'/home/server/pi/homes/woodilla/Data/GFW_point/Patagonia_Shelf/feather/2016-01-02.feather'

In [238]:
dist_dat = pd.DataFrame(columns = ['date', 'mean', 'std', 'kurtosis'])
count = 0
for i in nfiles[0:365]:
    indat = pd.read_feather(i)
    indat = indat.sort_values('mmsi')
    outdat = indat
    outdat['lat_avg'] = outdat.groupby('mmsi').lat.transform('mean')
    outdat['lon_avg'] = outdat.groupby('mmsi').lon.transform('mean')
    outdat = outdat.groupby('mmsi').first().reset_index()

    outdat = outdat[['timestamp', 'year', 'month', 'day', 'mmsi', 'lat', 'lon', \
                    'segment_id', 'message_id', 'type', 'speed', 'course', 'heading', 'shipname', 'callsign', \
                     'destination', 'elevation_m', 'distance_from_shore_m', 'distance_from_port_m', 'nnet_score', \
                     'logistic_score', 'flag', 'geartype', 'length', 'tonnage', 'engine_power', 'active_2012', \
                     'active_2013', 'active_2014', 'active_2015', 'active_2016']]
    date = f"{outdat['year'][1]}-" + f"{outdat['month'][1]}".zfill(2) + f"-" + f"{outdat['day'][1]}".zfill(2)

    matdat = pd.DataFrame(spherical_dist_populate(outdat['lat'], outdat['lon']))
    matdat = matdat.rename(index=outdat.mmsi, columns = outdat.mmsi)    
    tmatdat = matdat.where(np.triu(np.ones(matdat.shape)).astype(np.bool))
    tmatdat = tmatdat.stack().reset_index()
    tmatdat.columns = ['vessel_A','vessel_B','distance']
    newdat = pd.DataFrame({'date': date, 'mean': np.mean(tmatdat['distance']), 'std': np.std(tmatdat['distance']), \
                         'kurtosis': kurtosis(tmatdat['distance']), 'skewness': skew(tmatdat['distance'])}, index=[0])
    dist_dat = dist_dat.append(newdat, ignore_index=True)
    dist_dat.to_feather('~/Projects/Patagonia-EDA/distance_sample.feather')
    

In [237]:
dist_dat

Unnamed: 0,date,kurtosis,mean,skewness,std
0,2016-01-02,-0.331482,1014.807438,0.439289,612.917789


In [165]:
newDF = pd.DataFrame() #creates a new dataframe that's empty
newDF = newDF.append(dist_dat, ignore_index = True) # ignoring index is optional
# try printing some data from newDF
print(newDF.head()) #again optional 

  date mean  std    0
0  NaN  NaN  NaN  1.0
1  NaN  NaN  NaN  1.0


In [128]:
keep = np.triu(np.ones(matdat.shape)).astype('bool').reshape(matdat.size)
keep
tmatdat = matdat.stack()[keep]

In [145]:
df = matdat.where(np.triu(np.ones(matdat.shape)).astype(np.bool))
df = df.stack().reset_index()
df.columns = ['vessel_A','vessel_B','distance']
print(df)

          vessel_A   vessel_B     distance
0                9          9     0.000000
1                9         11   541.122736
2                9        372   255.331914
3                9       1942   402.017503
4                9       1957   559.728170
5                9       2409   167.194879
6                9       2411    17.185609
7                9       2829   140.236311
8                9       2902    18.879051
9                9       2940   140.220199
10               9       3098   559.908751
11               9       3099   559.912177
12               9       3340    13.648176
13               9       3496   112.078258
14               9       3604    25.153479
15               9       3887   126.880149
16               9       5625   607.717577
17               9      36407   541.187309
18               9     905000   115.449384
19               9    1129435  2371.993911
20               9    1193046   607.789131
21               9    1193047   629.862906
22         

In [65]:
outdat.head()

Unnamed: 0,235108697,412331035,412217993,412331035.1,412100000,412331077,100000000,412420908,412329693,412420908.1,...,412000000,225372000,440282000,412329693.1,352941000,351199000,209889000,412371204,412420908.2,477535400
235108697,0.0,541.122736,255.331914,402.017503,559.72817,167.194879,17.185609,140.236311,18.879051,140.220199,...,567.394959,569.162535,572.172355,572.903112,575.776624,560.922757,1287.916608,446.062608,140.885851,1005.769123
412331035,541.122736,0.0,785.963393,229.188199,96.086971,687.943234,523.956234,666.092687,552.658178,666.323356,...,98.354627,102.36288,108.471753,110.347614,116.181029,91.850607,756.804708,164.188973,666.914217,771.155718
412217993,255.331914,785.963393,0.0,657.183536,791.480713,99.112619,271.790248,119.928999,239.754582,119.718872,...,799.068955,800.233219,802.329573,802.767455,804.755145,793.422846,1538.977938,700.950541,119.116688,1120.474025
412331035,402.017503,229.188199,657.183536,0.0,307.803363,567.425833,385.903522,541.342363,418.385881,541.384523,...,313.235577,317.005061,322.884156,324.598477,330.194138,305.327444,903.101027,67.12244,542.043037,960.150328
412100000,559.72817,96.086971,791.480713,307.803363,0.0,692.383626,542.963713,673.160015,568.553058,673.468165,...,7.667884,10.050056,15.24708,16.862627,22.391869,4.920142,776.187208,248.066332,674.008563,675.178159


235108697  235108697       0.000000
           412331035     541.121257
           412217993     278.741196
           412331035     402.017643
           412100000     559.729366
           412331077     223.365543
           100000000      41.054941
           412420908     140.220947
           412329693      18.879051
           412420908     140.220199
           412100000     559.910873
           412271490     559.908630
           412420908      10.288225
           412331036     112.075526
           412331088      33.006864
           412217992     126.889719
           565486000     607.717528
           636016305     541.186183
           412331036     115.449980
           412329693    2371.993911
           636016305     607.789131
           293800000     632.824357
           412329693    2187.224782
           412331037      21.116960
           412331036     139.384336
           412271490     520.264974
           412217992     761.869845
           412217993     421

In [45]:
outdat.head()

Unnamed: 0,235108697,412331035,412217993,412331035.1,412100000,412331077,100000000,412420908,412329693,412420908.1,...,412000000,225372000,440282000,412329693.1,352941000,351199000,209889000,412371204,412420908.2,477535400
235108697,0.0,541.121257,278.741196,402.017643,559.729366,223.365543,41.054941,140.220947,18.879051,140.220199,...,567.389642,569.160533,572.172763,572.901809,575.772608,560.920956,1287.916608,459.173247,140.886118,1004.784628
412331035,541.121257,0.0,813.131114,229.186365,96.088449,748.925988,524.899576,666.080661,552.656692,666.321854,...,98.348787,102.361842,108.470512,110.345649,116.177865,91.846416,756.806215,123.895543,666.913058,770.005932
412217993,278.741196,813.131114,0.0,679.499308,820.302241,70.340454,288.562956,147.415344,264.040606,147.133884,...,827.900454,829.124109,831.30686,831.769118,833.832024,822.173647,1564.935945,737.312951,146.567448,1150.000786
412331035,402.017643,229.186365,679.499308,0.0,307.804172,625.199158,402.170821,541.328019,418.385999,541.384631,...,313.227825,317.002399,322.882452,324.595461,330.189029,305.322575,903.100559,105.353018,542.043434,959.008994
412100000,559.729366,96.088449,820.302241,307.804172,0.0,753.049737,537.147553,673.152627,568.554214,673.469171,...,7.661467,10.046475,15.244808,16.85885,22.385964,4.924325,776.186875,206.760523,674.009924,674.026647


In [15]:
dat = pd.read_feather(nfiles[0])
dat.head()

Unnamed: 0,timestamp,year,month,day,hour,minute,second,mmsi,lat,lon,...,flag,geartype,length,tonnage,engine_power,active_2012,active_2013,active_2014,active_2015,active_2016
0,2016-01-02 16:28:15,2016,1,2,16,28,15,235108697,-44.355,-46.551667,...,,,,,,,,,,
1,2016-01-02 16:32:20,2016,1,2,16,32,20,412331035,-45.706667,-60.595,...,CHN,squid_jigger,66.115778,1330.921203,1873.000284,False,True,True,True,True
2,2016-01-02 16:34:03,2016,1,2,16,34,3,412217993,-45.853333,-60.506667,...,CHN,squid_jigger,67.65289,1664.021383,2325.908576,False,False,True,True,True
3,2016-01-02 15:47:15,2016,1,2,15,47,15,412331035,-45.705,-60.598333,...,CHN,squid_jigger,66.115778,1330.921203,1873.000284,False,True,True,True,True
4,2016-01-02 04:40:14,2016,1,2,4,40,14,412100000,-45.521667,-60.373333,...,,,,,,,,,,
