In [1]:
import os.path
import shutil

import numpy as np
import pandas as pd

df = pd.read_csv("snow_crab_survey.csv", encoding="windows-1252")
df.columns

Index(['year__annee', 'month__mois', 'day__jour', 'tow__trait', 'longitude',
       'latitude', 'start__depart', 'comment__commentaire',
       'snow_crab_males__males_de_crabe_des_neiges',
       'snow_crab_females__females_de_crabe_des_neiges',
       'Atlantic_cod__morue_franche', 'American_plaice__plie_americaine',
       'Yellowtail_flounder__limande_a_queue_jaune',
       'winter_flounder__plie_rouge', 'thorny_skate__raie_epineuset',
       'smooth_skate__raie_lisse',
       'longhorn_sculpin__chaboisseau_a_dix_huit_epines',
       'sea_potato__patate_de_mer', 'Hyas_coarctatus'],
      dtype='object')

In [2]:
# define the function to combine datetime from a row
def combine_date(row):
    year = str(row["year__annee"]).zfill(4)
    month = str(row["month__mois"]).zfill(2)
    day = str(row["day__jour"]).zfill(2)
    time = row["start__depart"]
    if not isinstance(time, str) and np.isnan(time):
        return f"{year}-{month}-{day}"
    else:
        return f"{year}-{month}-{day} {time}"

In [3]:
# add a new column that is a datetime
df["datetime"] = pd.to_datetime(df.apply(combine_date, axis=1))
df_trunc = df.sort_values("datetime").loc[:, ["year__annee", "latitude", "longitude", "datetime"]]
df_trunc = df_trunc.drop_duplicates()
df_trunc

Unnamed: 0,year__annee,latitude,longitude,datetime
0,2019,47.38342,-60.3894,2019-07-12 05:12:29
1,2019,47.43065,-60.4655,2019-07-12 06:55:00
2,2019,47.52278,-60.4038,2019-07-12 09:50:55
3,2019,47.55585,-60.4394,2019-07-12 11:10:07
4,2019,47.51007,-60.4835,2019-07-12 12:32:42
...,...,...,...,...
1393,2022,46.80943,-61.5015,2022-09-07 11:38:20
1394,2022,46.73471,-61.3558,2022-09-07 12:52:26
1395,2022,46.69394,-61.2367,2022-09-07 14:02:29
1396,2022,46.63319,-61.2400,2022-09-07 15:01:27


In [4]:
from stock_assessment_surveys.utils.xy2line import xy2line
xy2line(df_trunc)