In [None]:
from io import StringIO

from dask.dataframe import from_pandas
from pandas import read_csv

data = StringIO(
    """timestamp_start,time_worked
    2021-01-01  9:25 AM,3 hours 12 minutes
    2021-02-03  4:25 PM,2 hours
    2021-03-05  1:25 PM,15 minutes
    2021-03-05 11:25 PM,55 minutes
    """
)
df = read_csv(data)
ddf = from_pandas(df, npartitions=2)

print(ddf.dtypes)
# timestamp_start    object
# time_worked        object
# dtype: object

In [None]:
from dask.dataframe import to_datetime

ddf["converted_timestamp_start"] = to_datetime(ddf["timestamp_start"])

print(ddf.dtypes)
# timestamp_start                      object
# time_worked                          object
# converted_timestamp_start    datetime64[ns]
# dtype: object

In [None]:
ddf["day_of_week"] = ddf["converted_timestamp_start"].dt.dayofweek

print(ddf[["converted_timestamp_start", "day_of_week"]].compute())
#   converted_timestamp_start  day_of_week
# 0       2021-01-01 09:25:00            4
# 1       2021-02-03 16:25:00            2
# 2       2021-03-05 13:25:00            4
# 3       2021-03-05 23:25:00            4

In [None]:
from pandas import Timedelta, to_timedelta

ddf["converted_time_worked"] = (
    ddf["time_worked"].apply(lambda x: to_timedelta(x), meta=Timedelta).compute()
)

print(ddf[["converted_timestamp_start", "converted_time_worked"]].compute())
#   converted_timestamp_start converted_time_worked
# 0       2021-01-01 09:25:00       0 days 03:12:00
# 1       2021-02-03 16:25:00       0 days 02:00:00
# 2       2021-03-05 13:25:00       0 days 00:15:00
# 3       2021-03-05 23:25:00       0 days 00:55:00

In [None]:
ddf["work_completed"] = ddf["converted_timestamp_start"] + ddf["converted_time_worked"]

print(
    ddf[
        ["converted_timestamp_start", "converted_time_worked", "work_completed"]
    ].compute()
)
#   converted_timestamp_start converted_time_worked      work_completed
# 0       2021-01-01 09:25:00       0 days 03:12:00 2021-01-01 12:37:00
# 1       2021-02-03 16:25:00       0 days 02:00:00 2021-02-03 18:25:00
# 2       2021-03-05 13:25:00       0 days 00:15:00 2021-03-05 13:40:00
# 3       2021-03-05 23:25:00       0 days 00:55:00 2021-03-06 00:20:00

In [None]:
ddf["converted_timestamp_start"].dt.floor("15 min").compute()
# 0   2021-01-01 09:15:00
# 1   2021-02-03 16:15:00
# 2   2021-03-05 13:15:00
# 3   2021-03-05 23:15:00
# Name: converted_timestamp_start, dtype: datetime64[ns]

In [None]:
from io import StringIO

from pandas import read_csv

data = StringIO(
    """timestamp_start,time_worked
    2021-01-01  9:25 AM,3 hours 12 minutes
    2021-02-03  4:25 PM,2 hours
    missing            ,15 minutes
    2021-03-05 11:?? PM,55 minutes
    """
)

df = read_csv(data)
ddf = from_pandas(df, npartitions=2)

print(ddf.dtypes)
# timestamp_start    object
# time_worked        object
# dtype: object

In [None]:
print(to_datetime(ddf["timestamp_start"], errors="coerce").compute())
# 0   2021-01-01 09:25:00
# 1   2021-02-03 14:25:00
# 2                   NaT
# 3                   NaT
# dtype: datetime64[ns]

In [None]:
print(to_datetime(ddf["timestamp_start"], errors="ignore").compute())
# 0        2021-01-01 09:25:00
# 1        2021-02-03 16:25:00
# 2        missing
# 3        2021-03-05 11:?? PM
# dtype: object

In [None]:
from io import StringIO

from dask.dataframe import from_pandas, to_datetime
from pandas import read_csv

data = StringIO(
    """timestamp_start,time_worked
2021-01-01  9:25 AM,3 hours 12 minutes
"Thursday, October 9, 2022 14:25",2 hours
"January 12, 2022 14:25",15 minutes
    """
)

df = read_csv(data)
ddf = from_pandas(df, npartitions=2)

ddf["converted_timestamp_start"] = to_datetime(ddf["timestamp_start"])
print(ddf[["timestamp_start", "converted_timestamp_start"]].compute())
#                    timestamp_start converted_timestamp_start
# 0              2021-01-01  9:25 AM       2021-01-01 09:25:00
# 1  Thursday, October 9, 2022 14:25       2022-10-09 14:25:00
# 2           January 12, 2022 14:25       2022-01-12 14:25:00

In [None]:
from io import StringIO

from dask.dataframe import from_pandas, to_datetime
from pandas import read_csv

data = StringIO(
    """timestamp_start,time_worked
year 2021: 01/01 9:25 AM,3 hours 12 minutes
year 2021: 01/03 3:25 PM,2 hours
year 2021: 01/05 11:25 AM,2 hours
"""
)
df = read_csv(data)
ddf = from_pandas(df, npartitions=2)

ddf["converted_timestamp_start"] = to_datetime(
    ddf["timestamp_start"], format="year %Y: %m/%d %I:%M %p"
)
print(ddf[["timestamp_start", "converted_timestamp_start"]].compute())
#              timestamp_start converted_timestamp_start
# 0   year 2021: 01/01 9:25 AM       2021-01-01 09:25:00
# 1   year 2021: 01/03 3:25 PM       2021-01-03 15:25:00
# 2  year 2021: 01/05 11:25 AM       2021-01-05 11:25:00

In [None]:
from io import StringIO

from dask.dataframe import from_pandas, to_datetime
from pandas import read_csv

data = StringIO(
    """timestamp_local,location
2021-01-01 09:01:12,Asia/Almaty
2021-01-01 09:01:12,Europe/London
2021-01-01 09:01:12,America/New_York
"""
)
df = read_csv(data)
ddf = from_pandas(df, npartitions=2)

ddf["converted_date"] = to_datetime(ddf["timestamp_local"], utc=False)
print(ddf[["timestamp_local", "converted_date"]].compute())
#        timestamp_local      converted_date
# 0  2021-01-01 09:01:12 2021-01-01 09:01:12
# 1  2021-01-01 09:01:12 2021-01-01 09:01:12
# 2  2021-01-01 09:01:12 2021-01-01 09:01:12

In [None]:
def convert_tz(datetime_object, local_timezone):
    timezone_aware = datetime_object.tz_localize(local_timezone)
    timezone_est = timezone_aware.tz_convert("America/New_York")
    return timezone_est


ddf["converted_date_tz_aware"] = ddf[["converted_date", "location"]].apply(
    lambda row: convert_tz(row["converted_date"], row["location"]),
    axis=1,
    meta=("converted_date_tz_aware", "float"),
)

print(ddf[["location", "converted_date_tz_aware"]].compute())
#            location   converted_date_tz_aware
# 0       Asia/Almaty 2020-12-31 22:01:12-05:00
# 1     Europe/London 2021-01-01 04:01:12-05:00
# 2  America/New_York 2021-01-01 09:01:12-05:00