In [None]:
from io import StringIO

from dask.dataframe import from_pandas
from pandas import read_csv

data = StringIO(
    """timestamp_start,time_worked
    2021-01-01  9:25 AM,3 hours 12 minutes
    2021-02-03  4:25 PM,2 hours
    2021-03-05  1:25 PM,15 minutes
    2021-03-05 11:25 PM,55 minutes
    """
)
df = read_csv(data)
ddf = from_pandas(df, npartitions=2)

print(ddf.dtypes)

In [None]:
from dask.dataframe import to_datetime

ddf["converted_timestamp_start"] = to_datetime(ddf["timestamp_start"])

print(ddf.dtypes)

In [None]:
ddf["day_of_week"] = ddf["converted_timestamp_start"].dt.dayofweek

print(ddf[["converted_timestamp_start", "day_of_week"]].compute())

In [None]:
from datetime import timedelta

from pandas import to_timedelta

ddf["converted_time_worked"] = (
    ddf["time_worked"].apply(lambda x: to_timedelta(x), meta=timedelta).compute()
)

print(ddf[["converted_timestamp_start", "converted_time_worked"]].compute())

In [None]:
ddf["work_completed"] = ddf["converted_timestamp_start"] + ddf["converted_time_worked"]

print(
    ddf[
        ["converted_timestamp_start", "converted_time_worked", "work_completed"]
    ].compute()
)

In [None]:
from io import StringIO

from pandas import read_csv

data = StringIO(
    """timestamp_start,time_worked
    2021-01-01  9:25 AM,3 hours 12 minutes
    2021-02-03  4:25 PM,2 hours
    missing            ,15 minutes
    2021-03-05 11:?? PM,55 minutes
    """
)

df = read_csv(data)
ddf = from_pandas(df, npartitions=2)

print(ddf.dtypes)

In [None]:
from io import StringIO

from dask.dataframe import from_pandas, to_datetime
from pandas import read_csv

data = StringIO(
    """timestamp_start,time_worked
2021-01-01  9:25 AM,3 hours 12 minutes
"Thursday, October 9, 2022 14:25",2 hours
"January 12, 2022 14:25",15 minutes
    """
)

df = read_csv(data)
ddf = from_pandas(df, npartitions=2)

ddf["converted_timestamp_start"] = to_datetime(ddf["timestamp_start"])
print(ddf[["timestamp_start", "converted_timestamp_start"]].compute())

In [None]:
from io import StringIO

from dask.dataframe import from_pandas, to_datetime
from pandas import read_csv

data = StringIO(
    """timestamp_start,time_worked
year 2021: 01/01 9:25 AM,3 hours 12 minutes
year 2021: 01/03 3:25 PM,2 hours
year 2021: 01/05 11:25 AM,2 hours
"""
)
df = read_csv(data)
ddf = from_pandas(df, npartitions=2)

ddf["converted_timestamp_start"] = to_datetime(
    ddf["timestamp_start"], format="year %Y: %m/%d %I:%M %p"
)
print(ddf[["timestamp_start", "converted_timestamp_start"]].compute())

In [None]:
from io import StringIO

from dask.dataframe import from_pandas, to_datetime
from pandas import read_csv

data = StringIO(
    """timestamp_local,location
2021-01-01 09:01:12,Asia/Almaty
2021-01-01 09:01:12,Europe/London
2021-01-01 09:01:12,America/New_York
"""
)
df = read_csv(data)
ddf = from_pandas(df, npartitions=2)

ddf["converted_date"] = to_datetime(ddf["timestamp_local"], utc=False)
print(ddf[["timestamp_local", "converted_date"]].compute())

In [None]:
def convert_tz(datetime_object, local_timezone):
    timezone_aware = datetime_object.tz_localize(local_timezone)
    timezone_est = timezone_aware.tz_convert("America/New_York")
    return timezone_est

In [None]:
ddf["converted_date_tz_aware"] = ddf[["converted_date", "location"]].apply(
    lambda row: convert_tz(row["converted_date"], row["location"]), axis=1, meta=""
)