In [27]:
import pandas as pd
import numpy as np

In [2]:
#!pip install haversine

In [28]:
from azureml.core import Datastore,Workspace,Dataset
# Get a named datastore from the current workspace
ws = Workspace.from_config()
datastore = Datastore.get(ws, datastore_name='aisdatastore')

In [29]:
rotte_dataset = ws.datasets["rotte"]

In [30]:
from datetime import datetime

def preprocessRotte(df):

    df = df.to_pandas_dataframe()
    #drop NA values
    df=df.dropna()
    #build timestamp column to be added to rotte_092021 dataframe
    stamp=df['stamp'].astype(float)
    #stamp    
    timestamp_column = stamp.apply(lambda x: datetime.fromtimestamp(x))
    #timestamp_column
    #add timestamp column to rotte_092021 dataframe
    df['timestamp'] = timestamp_column.values
    #sort_dataset by mmsi,stamp,lng,lat
    df = df.sort_values(by=['mmsi','stamp','lng','lat'],ascending=[True,True,True,True])
    
    return df

In [31]:
rotte_dataset = preprocessRotte(rotte_dataset)

Resolving access token for scope "https://datalake.azure.net//.default" using identity of type "MANAGED".
Getting data access token with Assigned Identity (client_id=clientid) and endpoint type based on configuration


In [32]:
ais_dataset =  ws.datasets["ais"]

In [33]:
shiptypeDict = {
    "0": "Unspecified",
    "1": "NULL",
    "2": "Fishing",
    "3": "Search and Rescue/Special Craft/Tug/Passenger/Other",
    "4": "High Speed Craft",
    "6": "Passenger",
    "7": "Cargo",
    "8": "Tanker",
    "9": "Sailing Vessel/Pleasure Craft"
}


In [34]:
def setShipTypeDesc (row):
    if row['type_summary'] == 'NULL':
        row['shiptype_desc'] = shiptypeDict[str(row['shiptype'])]
    else:
        row['shiptype_desc'] = row['type_summary']

In [35]:
def preprocessAIS(df):

    df = df.to_pandas_dataframe()
    #set type when is NULL
    df['shiptype_desc'] = df.apply(lambda x: (shiptypeDict[str(x.shiptype)] if x.type_summary == 'NULL' else x.type_summary), axis=1)
    #keep vessels whose type != Fishing
    df = df[df['shiptype']!=2]
    
    return df

In [36]:
ais_dataset = preprocessAIS(ais_dataset)

In [37]:
#CHECK
#ais_dataset[(ais_dataset['type_summary'] != 'NULL')  & (ais_dataset['type_summary'] != ais_dataset['shiptype_desc'])]

ais_dataset[(ais_dataset['type_summary'] != ais_dataset['shiptype_desc'])]

Unnamed: 0,lat,lng,speed,course,heading,elapsed,destination,flag,length,rot,...,dwt,gt_shiptype,shiptype,time,imo,mmsi,porto,type_summary,cod_porto,shiptype_desc
25,40.83993,8.398233,0.0,0,,3,PORTOTORES,IT,35.0,,...,100,54.0,3,2021-12-14,7512507,24717000,Porto Torres,,ITPTO,Search and Rescue/Special Craft/Tug/Passenger/...
286,37.23005,15.204770,1.0,36,340,1,ITAUG,CY,147.0,0,...,3250,116.0,6,2021-12-13,7602120,209510000,Augusta,,IT00ITAUG,Passenger
406,45.63348,13.776620,0.0,57,49,2,TRIESTE,CY,38.0,0,...,100,93.0,3,2021-10-26,9492995,209910000,Trieste,,IT00ITTRS,Search and Rescue/Special Craft/Tug/Passenger/...
528,44.40485,8.896432,0.0,284,103,1,ITGOA,DE,294.0,0,...,66971,11.0,7,2021-10-23,9193305,211331640,Genova,,IT00ITGOA,Cargo
593,44.41770,8.791890,0.0,70,255,4,IT LIV>IT GOA,DE,336.0,0,...,103691,11.0,7,2021-10-02,9295268,211839000,Genova,,IT00ITGOA,Cargo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60387,38.44803,15.902660,0.0,0,199,4,IT GIT,LR,147.0,-8,...,13619,11.0,7,2021-10-22,9330238,636092525,Gioia Tauro,,IT00ITGIT,Cargo
60418,44.40464,8.896410,0.0,297,296,1,IT LIV>IT GOA,LR,259.0,2,...,58027,11.0,7,2021-09-23,9495765,636092697,Genova,,IT00ITGOA,Cargo
60477,44.41850,8.786523,0.0,85,29,2,ITGOA,LR,332.0,-720,...,108448,11.0,7,2021-09-04,9330070,636092986,Genova,,IT00ITGOA,Cargo
60545,45.45181,12.254980,0.0,,,429,,--,0.0,,...,,,0,2021-10-08,9086526,667001963,Venezia,,IT00ITVCE,Unspecified


In [130]:
#ais_dataset[ais_dataset['mmsi']== 209910000]

In [38]:
vessels_type = ais_dataset['type_summary'].unique()
vessels_flag = ais_dataset['flag'].unique()
print(vessels_type)
print(vessels_flag)

['Cargo' 'Tug' 'Pleasure Craft' 'NULL' 'Passenger' 'Tanker'
 'Special Craft' 'High Speed Craft' 'Sailing Vessel' 'Search and Rescue'
 'Other' 'Unspecified']
['--' 'KY' 'IT' 'GB' 'DZ' 'MH' 'AL' 'BE' 'BG' 'CY' 'DE' 'MD' 'MT' 'DK'
 'ES' 'FR' 'GI' 'GR' 'HR' 'MA' 'NL' 'PA' 'KI' 'RU' 'BS' 'LU' 'MC' 'PT'
 'NO' 'SE' 'CH' 'TR' 'UA' 'LV' 'LT' 'AG' 'BM' 'BZ' 'BB' 'HN' 'US' 'JM'
 'KN' 'VC' 'VG' 'SA' 'CN' 'TW' 'AZ' 'IL' 'JP' 'KZ' 'KR' 'KW' 'LB' 'QA'
 'HK' 'PW' 'CK' 'NU' 'PH' 'SG' 'TV' 'VN' 'VU' 'WF' 'CM' 'KM' 'EG' 'LR'
 'PL' 'LY' 'SL' 'TG' 'TN']


In [39]:
vessels_mmsi = ais_dataset['mmsi'].unique().astype(int)
vessels_imo = ais_dataset['imo'].unique().astype(int)

print(len(vessels_mmsi))
print(len(vessels_imo))

3150
3122


In [40]:
world_porti = ws.datasets["porti1"]

In [41]:
def preprocessPorts(df):

    try:
        df = df.to_pandas_dataframe()
    except Exception as e:
        print(e)
    df = df[['Country','Name','Latitude','Longitude','UNLocode']]
    df.reset_index(drop=True,inplace=True)
    return df


In [42]:
world_porti = preprocessPorts(world_porti)

In [21]:
#Analysis of data
#rotte_dataset_218158000 = rotte_dataset[rotte_dataset['mmsi']=='218158000']

#rotte_dataset_201100115 = rotte_dataset[rotte_dataset['mmsi']=='201100115']

#rotte_dataset_247046800 = rotte_dataset[rotte_dataset['mmsi']==247046800]
#rotte_dataset_247046800.to_csv("df_rotte_dataset_247046800_set2021.csv",index=False,sep=',')

rotte_dataset_247043300 = rotte_dataset[rotte_dataset['mmsi']==247043300]
rotte_dataset_247043300.to_csv("df_rotte_dataset_247043300_set2021.csv",index=False,sep=',')



In [24]:
ais_dataset[ais_dataset['mmsi']==247043300]


Unnamed: 0,lat,lng,speed,course,heading,elapsed,destination,flag,length,rot,...,w_left,dwt,gt_shiptype,shiptype,time,imo,mmsi,porto,type_summary,cod_porto
7878,40.84383,14.26333,0.0,,,4,,IT,1.0,,...,0.0,,,1,2021-09-06,5258482,247043300,Napoli,,IT00ITNAP
7879,40.83452,14.26462,0.0,32.0,,3,MARE,IT,57.0,,...,6.0,25.0,23.0,6,2021-09-07,5258482,247043300,Napoli,Passenger,IT00ITNAP
7880,40.83451,14.26461,0.0,27.0,,493,MARE,IT,57.0,,...,6.0,25.0,23.0,6,2021-09-08,5258482,247043300,Napoli,Passenger,IT00ITNAP
7881,40.83452,14.26463,0.0,29.0,,3,MARE,IT,57.0,,...,6.0,25.0,23.0,6,2021-09-14,5258482,247043300,Napoli,Passenger,IT00ITNAP
7882,40.83451,14.26464,0.0,30.0,,469,MARE,IT,57.0,,...,6.0,25.0,23.0,6,2021-09-15,5258482,247043300,Napoli,Passenger,IT00ITNAP
7883,40.84383,14.26333,0.0,,,4,,IT,1.0,,...,0.0,,,1,2021-09-19,5258482,247043300,Napoli,,IT00ITNAP
7884,40.83512,14.2653,46.0,109.0,,10,MARE,IT,57.0,,...,6.0,25.0,23.0,6,2021-09-24,5258482,247043300,Napoli,Passenger,IT00ITNAP
7885,40.8431,14.2637,0.0,109.0,,822,MARE,IT,57.0,,...,6.0,25.0,23.0,6,2021-09-25,5258482,247043300,Napoli,Passenger,IT00ITNAP
7886,40.83451,14.26462,0.0,23.0,,8,MARE,IT,57.0,,...,6.0,25.0,23.0,6,2021-09-28,5258482,247043300,Napoli,Passenger,IT00ITNAP


In [22]:
#Analysis of data
#rotte_dataset_218158000.to_csv("DCME\\AIS\\rotte_with_speed_2021\\rotte_arrivi_porti_set2021_218158000.csv",index=False,sep=',')
#rotte_dataset_201100115.to_csv("DCME\\AIS\\rotte_with_speed_2021\\rotte_arrivi_porti_set2021_201100115.csv",index=False,sep=',')

In [43]:
rotte_dataset_mmsi =  rotte_dataset['mmsi'].unique().astype(int)
print(len(rotte_dataset_mmsi))

2840


In [44]:
#Drop rows whose mmsi is not in rotte_092021_simplified(rotte_092021_mmsi,vessels_mmsi) from rotte_092021,
# i.e. get only rows whos mmsi is in mmsi_intersection

rotte_dataset_simplified = rotte_dataset[rotte_dataset['mmsi'].astype(int).isin(vessels_mmsi)]
#rotte_dataset_simplified.head()
print(len(rotte_dataset_simplified))
rotte_dataset_simplified = rotte_dataset_simplified.reset_index()
#rotte_dataset_simplified.head()

33334193


ELABORAZIONE DELLE ROTTE

In [48]:
from datetime import datetime
import haversine as hs
from haversine import Unit

In [49]:
#elaborazione degli arrivi (df)
def arrival_elaboration(df_rotte):
    #df_rotte = rottesort

    dim=len(df_rotte)
    print("df_rotte len before: ",dim)

    df_arrival = pd.DataFrame(columns = ['row','mmsi','arrival','departure','lng','lat','lng_orig','lat_orig','speed','status'])

    oldmmsi=0
    sumrec=0
    start=0
    lat_orig=0
    lng_orig=0
    oldlng=0
    oldlat=0
    start = None
    i=0
    status=0 ###0-nuovo 1-arrivato 2-partito
    end = None
    speed = None

    start_time = datetime.now()

    for item in df_rotte.itertuples():
        try:
            #item = df_rotte.iloc[i]
            mmsi,time_voyage,lng,lat,speed = item.mmsi,item.timestamp,float(item.lng),float(item.lat),int(item.speed)

            if(mmsi!=oldmmsi):
                if (status==1):#si riferisce alla old ship
                    df_arrival=df_arrival.append({'row':i,'mmsi':oldmmsi,'arrival':start,
                           'departure':end,'lng':oldlng,'lat':oldlat,'lng_orig':lng_orig,'lat_orig':lat_orig,
                            'speed':speed,'status':status},ignore_index=True)
                start,status,lng_orig,lat_orig,oldlng,oldlat,oldmmsi=None,0,0,0,0,0,mmsi
                oldmmsi = mmsi
    
            if(speed==0):          
                if (status==0):
                    start,oldlng,oldlat = time_voyage,lng,lat
                
                #To calculate distance in meters
                if(status<2):
                    loc1=(lat,lng)
                    loc2=(oldlat,oldlng)
            
                    distance = hs.haversine(loc1,loc2,unit=Unit.METERS)
            
                    if(distance > 3000.0):
                        #print(distance)
                        #if ((abs(oldlng-lng)+abs(oldlat-lat))>0.3):
                        df_arrival=df_arrival.append({'row':i,'mmsi':oldmmsi,'arrival':start,
                           'departure':end,'lng':oldlng,'lat':oldlat,
                                       'lng_orig':lng_orig,'lat_orig':lat_orig,'speed':speed,'status':status},ignore_index=True)
                        start,lng_orig,lat_orig,oldlng,oldlat = time_voyage,oldlng,oldlat,lng,lat
                    
                if(status==2):
                    start,lng_orig,lat_orig,oldlng,oldlat = time_voyage,oldlng,oldlat,lng,lat
                    
                end=time_voyage
                status=1
                
            if (speed>0):
                if(status==1):
                    loc1=(lat,lng)
                    loc2=(oldlat,oldlng)
                    distance = hs.haversine(loc1,loc2,unit=Unit.METERS)
                    if(distance > 3000.0):
                        df_arrival=df_arrival.append({'row':i,'mmsi':oldmmsi,'arrival':start,
                           'departure':end,'lng':oldlng,'lat':oldlat,
                            'lng_orig':lng_orig,'lat_orig':lat_orig,'speed':speed,'status':2},ignore_index=True)
                        
                        status,lng_orig,lat_orig = 2,oldlng,oldlat
                if(status==0):
                    status=2
                   
            #stampa di controllo
            if(i%1000000 == 0):
               print(i)
        except Exception as e:
            print(i," ")
            print(e)
        i+=1
    #end for
    
    #scrive l'ultimo record
    if(status==1):
            df_arrival=df_arrival.append({'row':i,'mmsi':oldmmsi,'arrival':start,
                        'departure':end,'lng':oldlng,'lat':oldlat,
                        'lng_orig':lng_orig,'lat_orig':lat_orig,'speed':speed,'status':status},ignore_index=True)

 
    end_time = datetime.now()
    print('Duration: {}'.format(end_time - start_time))
    print("df_arrival len after: ",len(df_arrival))
    
    return(df_arrival)

In [46]:
df_rotte = rotte_dataset_simplified[['mmsi','stamp','timestamp','lng','lat','speed']]
df_rotte = df_rotte.sort_values(by=['mmsi','stamp'],ascending=[True,True])
df_rotte = df_rotte.reset_index()

In [50]:
df_arrival= arrival_elaboration(df_rotte)

df_rotte len before:  33334193
0
1000000
2000000
3000000
4000000
5000000
6000000
7000000
8000000
9000000
10000000
11000000
12000000
13000000
14000000
15000000
16000000
17000000
18000000
19000000
20000000
21000000
22000000
23000000
24000000
25000000
26000000
27000000
28000000
29000000
30000000
31000000
32000000
33000000
Duration: 0:18:42.514214
df_arrival len after:  82808


In [34]:
#ita_porti = world_porti[world_porti['Country']=='ITA']
#len(ita_porti)

457

In [51]:
def port_assign(df_rotte,df_porti):
    
    n=len(df_rotte)
    df = df_rotte
    df["port"] = ["null"]*n
    df["port_orig"] = ["null"]*n
    df["UNLocode"] = ["NA"]*n 
    df["UNLocode_orig"] = ["NA"]*n

    start_time = datetime.now()
    
    i=0
    for rotta in df_rotte.itertuples():
        found_orig,found=0,0
        #departure port position
        loc_orig=(float(rotta.lat_orig),float(rotta.lng_orig))
        #arrival port position
        loc=(float(rotta.lat),float(rotta.lng))
        
        for porto in df_porti.itertuples():            
            loc_port=(float(porto.Latitude),float(porto.Longitude))            
            distance1 = hs.haversine(loc,loc_port,unit=Unit.METERS)
            if(distance1 < 5000.0):
                df.loc[i,'port']=porto.Name
                df.loc[i,'UNLocode'] = porto.UNLocode
                found=1
                break
        if (found==0):
            df.loc[i,'port']='Not found'
        
        for porto in df_porti.itertuples():             
            loc_port=(float(porto.Latitude),float(porto.Longitude))
            distance2 = hs.haversine(loc_orig,loc_port,unit=Unit.METERS)
            if(distance2 < 5000.0):
                df.loc[i,'port_orig']=porto.Name
                df.loc[i,'UNLocode_orig'] = porto.UNLocode
                found_orig=1
                break    

        if (found_orig==0):
            df.loc[i,'port_orig']='Not found'
            
        #stampa di controllo
        if(i%10000 == 0):
               print(i)
        i+=1
    
    end_time = datetime.now()
    print('Duration: {}'.format(end_time - start_time))
    print("df_arrival len after: ",len(df_arrival))
        
    return(df)

In [52]:
df_rotte_arrivi_porti = port_assign(df_arrival,world_porti)

0
10000
20000
30000
40000
50000
60000
70000
80000
Duration: 0:44:02.455540
df_arrival len after:  82808


In [53]:
#export all records 1st and 2nd step
df_rotte_arrivi_porti.to_csv("df_rotte_arrivi_porti_set2021_after_portAssign.csv",index=False,sep=',')
#df_rotte_arrivi_porti.head()

ARRIVI PARTENZE

In [154]:
#df_rotte_arrivi_porti[["arrival", "departure"]] = df_rotte_arrivi_porti[["arrival", "departure"]].apply(pd.to_datetime)
#df_rotte_arrivi_porti.dtypes

In [54]:
def calc_tratte(df_arrivi):

    dim=len(df_arrivi)
    print("dim arrivals: ",dim)

    df_arrivi[["arrival", "departure"]] = df_arrivi[["arrival", "departure"]].apply(pd.to_datetime)


    df_tratte = pd.DataFrame(columns = ['mmsi','partenza','porto_orig','cod_orig','arrivo','porto_dest','cod_dest','status'])

    prevmmsi=0

    i=0
    status=0 ###
    prevpartenza=""
    prevport=""
    prevcode=""

    start_time = datetime.now()

    for current in df_arrivi.itertuples():
        i=i+1

        if (current.mmsi!=prevmmsi):
            if (prevpartenza!=""):
                df_tratte=df_tratte.append({'mmsi':prevmmsi,'partenza':prevpartenza,
                    'porto_orig':prevport,'cod_orig':prevcode,'arrivo':"",
                    'porto_dest':"",'cod_dest':"",'status':status},ignore_index=True)
            prevpartenza=""
            prevport=""
            prevcode=""

        time_approdo=(current.departure-current.arrival).seconds
        if (time_approdo>=900 and current.port!="Not found"):
            #print("rec ",i," : ",current)
            df_tratte=df_tratte.append({'mmsi':current.mmsi,'partenza':prevpartenza,
                'porto_orig':prevport,'cod_orig':prevcode,'arrivo':current.arrival,
                'porto_dest':current.port,'cod_dest':current.UNLocode,'status':status},ignore_index=True)
            prevmmsi=current.mmsi
            prevpartenza=current.departure
            prevport=current.port
            prevcode=current.UNLocode
        
        if(i%10000 == 0):
               print(i)

    if (prevpartenza!=""):
        df_tratte=df_tratte.append({'mmsi':prevmmsi,'partenza':prevpartenza,
            'porto_orig':prevport,'cod_orig':prevcode,'arrivo':"",
            'porto_dest':"",'cod_dest':"",'status':status},ignore_index=True)


    end_time = datetime.now()
    print('Duration: {}'.format(end_time - start_time))
    print("df_arrival len after: ",len(df_arrival))
    
    return(df_tratte)

In [55]:
df_rotte_arrivi_porti=calc_tratte(df_rotte_arrivi_porti)

dim arrivals:  82808
10000
20000
30000
40000
50000
60000
70000
80000
Duration: 0:04:51.142784
df_arrival len after:  82808


ADD INFORMATION TO THE DATASET

In [56]:
vessels_mmsi_imo_dict = pd.Series(ais_dataset.imo.values,index=ais_dataset.mmsi).to_dict()
#vessels_mmsi_callsign_dict = pd.Series(ais_dataset.callsign.values,index=ais_dataset.mmsi).to_dict()
vessels_mmsi_shiptype_desc_dict = pd.Series(ais_dataset.shiptype_desc.values,index=ais_dataset.mmsi).to_dict()
vessels_mmsi_type_summary_dict = pd.Series(ais_dataset.type_summary.values,index=ais_dataset.mmsi).to_dict()

vessels_mmsi_imo_clean_dict = {k: vessels_mmsi_imo_dict[k] for k in vessels_mmsi_imo_dict if not pd.isna(vessels_mmsi_imo_dict[k])}
#vessels_mmsi_callsign_clean_dict = {k: vessels_mmsi_callsign_dict[k] for k in vessels_mmsi_callsign_dict if not pd.isna(vessels_mmsi_callsign_dict[k])}
vessels_mmsi_shiptype_desc_clean_dict = {k: vessels_mmsi_shiptype_desc_dict[k] for k in vessels_mmsi_shiptype_desc_dict if not pd.isna(vessels_mmsi_shiptype_desc_dict[k])}
vessels_mmsi_type_summary_clean_dict = {k: vessels_mmsi_type_summary_dict[k] for k in vessels_mmsi_type_summary_dict if not pd.isna(vessels_mmsi_type_summary_dict[k])}

In [57]:
df_rotte_arrivi_porti["imo"] = df_rotte_arrivi_porti["mmsi"].astype(int)
df_rotte_arrivi_porti['imo'].replace(vessels_mmsi_imo_clean_dict ,inplace=True)

#df_rotte_arrivi_porti["callsign"] = df_rotte_arrivi_porti["mmsi"].astype(int)
#df_rotte_arrivi_porti['callsign'].replace(vessels_mmsi_callsign_clean_dict ,inplace=True)

df_rotte_arrivi_porti['shiptype_desc'] = df_rotte_arrivi_porti["mmsi"].astype(int)
df_rotte_arrivi_porti['shiptype_desc'].replace(vessels_mmsi_shiptype_desc_clean_dict ,inplace=True)

df_rotte_arrivi_porti['type_summary'] = df_rotte_arrivi_porti["mmsi"].astype(int)
df_rotte_arrivi_porti['type_summary'].replace(vessels_mmsi_type_summary_clean_dict ,inplace=True)

In [58]:
#export all records
df_rotte_arrivi_porti.to_csv("df_rotte_arrivi_porti_set2021_final.csv",index=False,sep=',')
#df_rotte_arrivi_porti

In [None]:
#export only records such that a port has been found
#df_rotte_arrivi_porti_world_found = df_rotte_arrivi_porti[(df_rotte_arrivi_porti['port'] != 'Not found')]
#df_rotte_arrivi_porti_world_found.to_csv(rotte_arrivi_porti_world_fname,index=False,sep=',')

COMPUTE STATISTICS OF ARRIVALS IN THE PORTS

In [4]:
#df_rotte_arrivi_porti = pd.read_csv("df_rotte_arrivi_porti_set2021.csv", sep=',')

In [59]:
df_rotte_arrivi_porti.head()

Unnamed: 0,mmsi,partenza,porto_orig,cod_orig,arrivo,porto_dest,cod_dest,status,imo,shiptype_desc,type_summary
0,1193046.0,,,,2021-09-01 01:29:26,Porto Torres,ITPTO,0,7512507,Tug,Tug
1,1193046.0,2021-09-01 01:50:28,Porto Torres,ITPTO,2021-09-01 02:26:27,Porto Torres,ITPTO,0,7512507,Tug,Tug
2,1193046.0,2021-09-01 02:41:28,Porto Torres,ITPTO,2021-09-01 04:23:29,Porto Torres,ITPTO,0,7512507,Tug,Tug
3,1193046.0,2021-09-01 04:44:31,Porto Torres,ITPTO,2021-09-01 05:35:31,Porto Torres,ITPTO,0,7512507,Tug,Tug
4,1193046.0,2021-09-01 20:20:53,Porto Torres,ITPTO,2021-09-01 20:38:53,Porto Torres,ITPTO,0,7512507,Tug,Tug


In [60]:
df_rotte_arrivi_porti[df_rotte_arrivi_porti['cod_orig'].str.startswith('IT', na=False)].groupby(['cod_orig','porto_orig']).size().to_csv('rotte_arrivi_porti_set2021_tot_port_orig.csv',index=False,sep=',')

In [61]:
df_rotte_arrivi_porti[df_rotte_arrivi_porti['cod_dest'].str.startswith('IT', na=False)].groupby(['cod_dest','porto_dest']).size().to_csv('rotte_arrivi_porti_set2021_tot_port_dest.csv',index=False,sep=',')

In [63]:
#imo==5271367


df_rotte_arrivi_porti[df_rotte_arrivi_porti['mmsi'] == 247056900].groupby(['cod_dest','porto_dest']).size()

cod_dest  porto_dest
                        1
ITCLF     Carloforte    6
ITPMO     Palermo       1
dtype: int64

In [None]:
#df_rotte_all = pd.read_csv(rotte_arrivi_porti_all_fname, sep=',', low_memory=False)

In [None]:
#df_rotte_passengers_cargo = df_rotte_all[(df_rotte_all['type_summary']=='Passenger') | (df_rotte_all['type_summary']=='Cargo')]


#df_rotte_arrivi_porti_pass_cargo = df_rotte_arrivi_porti[(df_rotte_arrivi_porti['type_summary']=='Passenger') | (df_rotte_arrivi_porti['type_summary']=='Cargo')]
#len(df_rotte_arrivi_porti_pass_cargo)

In [None]:
#df_rotte_arrivi_porti_pass_cargo.to_csv("DCME\\AIS\\rotte_with_speed_2021\\rotte_arrivi_porti_passengers_cargo_set2021.csv",index=False,sep=',')