# Extract Dense Observation Drift Tracks

In [2]:
from icedef import iceberg

import pandas as pd

In [26]:
iip_season = 2015  # must be 2015 (more to come later)
max_hours = 12

In [36]:
def extract_dense_tracks(iip_season, max_hours, save=False):

    iip_df = iceberg.add_datetime_column(iceberg.get_iip_df(iip_season))

    iip_df = iip_df.sort_values(['ICEBERG_NUMBER', 'TIMESTAMP'], ascending=[True, True])

    iip_df = iip_df.reset_index(drop=True).drop(labels=['ICEBERG_YEAR', 'SIGHTING_DATE','SIGHTING_TIME','SIGHTING_METHOD'], axis=1)

    berg_nums = iip_df['ICEBERG_NUMBER'].tolist()
    berg_times = iip_df['TIMESTAMP'].tolist()

    berg_num = berg_nums[0]

    good_indices = []


    for i, row in iip_df.iterrows():

        if i+1 >= len(iip_df):
            break

        berg_num0 = berg_nums[i]
        berg_num1 = berg_nums[i+1]

        if berg_num0 == berg_num1:

            time0 = berg_times[i]
            time1 = berg_times[i+1]
            dtime = time1 - time0
            dt_hours = dtime.days*24 + dtime.seconds/3600

            if dt_hours < max_hours:
                good_indices.append(i)
                good_indices.append(i+1)

    good_indices = sorted(list(set(good_indices)))

    iip_df2 = iip_df[iip_df.index.isin(good_indices)]

    iip_df2['count'] = iip_df2.groupby('ICEBERG_NUMBER')['ICEBERG_NUMBER'].transform('count')

    iip_df2 = iip_df2.sort_values(['ICEBERG_NUMBER','TIMESTAMP'], ascending=[True, True])

    iip_df2 = iip_df2.reset_index(drop=True)

    berg_nums = iip_df2['ICEBERG_NUMBER'].tolist()
    berg_times = iip_df2['TIMESTAMP'].tolist()

    track_num = 0
    iip_df2['track_num'] = pd.Series(dtype=int)

    for i, row in iip_df2.iterrows():
        if i+2 > len(iip_df2):
            break
        berg_num0 = berg_nums[i]
        berg_num1 = berg_nums[i+1]
        time0 = berg_times[i]
        time1 = berg_times[i+1]
        dtime = time1 - time0
        dt_hours = dtime.days*24 + dtime.seconds/3600
        if berg_num0 == berg_num1 and dt_hours < max_hours:
            iip_df2.loc[i, 'track_num'] = track_num
            iip_df2.loc[i+1, 'track_num'] = track_num
        else:
            track_num += 1


    iip_df2 = iip_df2.sort_values(['track_num'], ascending=[True]).reset_index(drop=True)
    
    if save:
        iip_df2.to_csv(f'csvs/{iip_season}_max{max_hours}hr_tracks')

    return iip_df2

In [37]:
iip_df2 = extract_dense_tracks(iip_season, max_hours)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [38]:
iip_df2.head()

Unnamed: 0,ICEBERG_NUMBER,SIGHTING_LATITUDE,SIGHTING_LONGITUDE,SIZE,SHAPE,SOURCE,TIMESTAMP,count,track_num
0,22,48.38,-48.15,MED,GEN,GMRS,2015-03-19 12:16:00,2,0.0
1,22,48.44,-48.05,MED,GEN,GTJZ,2015-03-19 18:21:00,2,0.0
2,20029,47.84,-48.88,GEN,GEN,GTJZ,2015-04-18 10:51:00,4,1.0
3,20029,47.84,-48.62,LG,GEN,GTJZ,2015-04-18 18:28:00,4,1.0
4,20029,47.68,-48.37,MED,GEN,GMRS,2015-04-20 13:20:00,4,2.0


In [86]:
df_20668 = iip_df2.loc[iip_df2['ICEBERG_NUMBER']==20668].reset_index(drop=True)
df_20668 = df_20668[2:8].reset_index(drop=True)
df_20668

Unnamed: 0,ICEBERG_NUMBER,SIGHTING_LATITUDE,SIGHTING_LONGITUDE,SIZE,SHAPE,SOURCE,TIMESTAMP,count
0,20668,47.55,-49.51,SM,GEN,GTJZ,2015-06-25 11:18:00,10
1,20668,47.43,-49.5,SM,GEN,GPGR,2015-06-25 19:18:00,10
2,20668,47.43,-49.6,GEN,GEN,GTJZ,2015-06-26 10:46:00,10
3,20668,47.44,-49.6,LG,GEN,GMRS,2015-06-26 16:46:00,10
4,20668,47.45,-49.61,LG,GEN,GPGR,2015-06-27 11:04:00,10
5,20668,47.35,-49.58,GEN,GEN,2005,2015-06-27 13:31:00,10


In [87]:
df_23496 = iip_df2.loc[iip_df2['ICEBERG_NUMBER']==23496].reset_index(drop=True)
df_23496[2:8]

Unnamed: 0,ICEBERG_NUMBER,SIGHTING_LATITUDE,SIGHTING_LONGITUDE,SIZE,SHAPE,SOURCE,TIMESTAMP,count
2,23496,47.44,-48.96,MED,GEN,GTJZ,2015-06-25 11:30:00,10
3,23496,47.38,-49.01,LG,TAB,GPGR,2015-06-25 19:24:00,10
4,23496,47.35,-49.01,GEN,GEN,GTJZ,2015-06-26 11:00:00,10
5,23496,47.34,-49.0,MED,GEN,GMRS,2015-06-26 16:57:00,10
6,23496,47.31,-48.97,LG,GEN,GPGR,2015-06-27 10:51:00,10
7,23496,47.27,-49.01,GEN,GEN,2005,2015-06-27 13:35:00,10


In [91]:
df_21511 = iip_df2.loc[iip_df2['ICEBERG_NUMBER']==21511].reset_index(drop=True)
df_21511[2:6]

Unnamed: 0,ICEBERG_NUMBER,SIGHTING_LATITUDE,SIGHTING_LONGITUDE,SIZE,SHAPE,SOURCE,TIMESTAMP,count
2,21511,47.56,-49.41,LG,GEN,GTJZ,2015-06-25 11:18:00,8
3,21511,47.51,-49.44,LG,GEN,GPGR,2015-06-25 19:08:00,8
4,21511,47.46,-49.48,GEN,GEN,GTJZ,2015-06-26 10:46:00,8
5,21511,47.44,-49.48,MED,GEN,GMRS,2015-06-26 16:51:00,8


In [105]:
grps = iip_df2.groupby('ICEBERG_NUMBER')

In [112]:
for k,v in grps.groups.items():
    bob = iip_df2[iip_df2.index.isin(v)]

In [113]:
bob

Unnamed: 0,ICEBERG_NUMBER,SIGHTING_LATITUDE,SIGHTING_LONGITUDE,SIZE,SHAPE,SOURCE,TIMESTAMP,count
13554,25753,46.11,-46.77,MED,GEN,GPGR,2015-07-12 13:52:00,2
13555,25753,46.04,-46.78,GEN,GEN,2006,2015-07-12 19:54:00,2


In [117]:
df_dict = {k: iip_df2[iip_df2.index.isin(v)].reset_index(drop=True) for k,v in grps.groups.items()}

In [118]:
len(df_dict)

325

In [119]:
df_dict

{22:    ICEBERG_NUMBER  SIGHTING_LATITUDE  SIGHTING_LONGITUDE SIZE SHAPE SOURCE  \
 0              22              48.38              -48.15  MED   GEN   GMRS   
 1              22              48.44              -48.05  MED   GEN   GTJZ   
 
             TIMESTAMP  count  
 0 2015-03-19 12:16:00      2  
 1 2015-03-19 18:21:00      2  ,
 20029:    ICEBERG_NUMBER  SIGHTING_LATITUDE  SIGHTING_LONGITUDE SIZE SHAPE SOURCE  \
 0           20029              47.84              -48.88  GEN   GEN   GTJZ   
 1           20029              47.84              -48.62   LG   GEN   GTJZ   
 2           20029              47.68              -48.37  MED   GEN   GMRS   
 3           20029              47.64              -48.41   SM   DOM   GTJZ   
 
             TIMESTAMP  count  
 0 2015-04-18 10:51:00      4  
 1 2015-04-18 18:28:00      4  
 2 2015-04-20 13:20:00      4  
 3 2015-04-20 18:46:00      4  ,
 20030:    ICEBERG_NUMBER  SIGHTING_LATITUDE  SIGHTING_LONGITUDE SIZE SHAPE SOURCE  \
 0       