In [1]:
import datetime as dt

Python official [documentation](https://docs.python.org/fr/3/library/datetime.html)

In [2]:
import numpy as np
from numpy import datetime64

In [3]:
import pandas as pd
from pandas import (date_range, DataFrame)

Pandas [time series documentation](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html)

https://arrow.apache.org/docs/python/timestamps.html

In [4]:
d = np.arange('2002-10-27T04:30', 10, 1, dtype='M8[ns]')

17261550

In [52]:
date2int = lambda elt: np.datetime64(elt).astype(np.int64)
dates = np.sort(np.random.randint(date2int('2002-10-27T04:30'), date2int('2003-10-27T04:30'), 10, dtype='i8')).astype('M8[ns]')

In [53]:
dates

array(['1970-01-01T00:00:00.017267398', '1970-01-01T00:00:00.017346836',
       '1970-01-01T00:00:00.017371345', '1970-01-01T00:00:00.017402394',
       '1970-01-01T00:00:00.017528378', '1970-01-01T00:00:00.017531413',
       '1970-01-01T00:00:00.017551833', '1970-01-01T00:00:00.017553291',
       '1970-01-01T00:00:00.017752672', '1970-01-01T00:00:00.017752882'],
      dtype='datetime64[ns]')

In [22]:
d[0].astype(int).astype('M8[ns]')

numpy.datetime64('2002-10-27T04:30:00.000000000')

In [5]:
# equals d.size * d.itemsize
d.nbytes

80

# Types

- All these types are immuables
- `date` objects are always *naives*
- `time` or `datetime` types can be either *naive* or *aware* (such an object d must success `(d.tzinfo != None) & (d.tzinfo.utcoffset(d) ! None)`

Python/Pandas timestamp types without a associated time zone are referred to as “Time Zone Naive”. Python/Pandas timestamp types with an associated time zone are referred to as “Time Zone Aware”.

In [6]:
dt.date?

[0;31mInit signature:[0m [0mdt[0m[0;34m.[0m[0mdate[0m[0;34m([0m[0mself[0m[0;34m,[0m [0;34m/[0m[0;34m,[0m [0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m      date(year, month, day) --> date object
[0;31mFile:[0m           /opt/conda/lib/python3.7/datetime.py
[0;31mType:[0m           type
[0;31mSubclasses:[0m     datetime


In [7]:
dt.time?

[0;31mInit signature:[0m [0mdt[0m[0;34m.[0m[0mtime[0m[0;34m([0m[0mself[0m[0;34m,[0m [0;34m/[0m[0;34m,[0m [0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
time([hour[, minute[, second[, microsecond[, tzinfo]]]]]) --> a time object

All arguments are optional. tzinfo may be None, or an instance of
a tzinfo subclass. The remaining arguments may be ints.
[0;31mFile:[0m           /opt/conda/lib/python3.7/datetime.py
[0;31mType:[0m           type
[0;31mSubclasses:[0m     


In [8]:
dt.datetime?

[0;31mInit signature:[0m [0mdt[0m[0;34m.[0m[0mdatetime[0m[0;34m([0m[0mself[0m[0;34m,[0m [0;34m/[0m[0;34m,[0m [0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]])

The year, month and day arguments are required. tzinfo may be None, or an
instance of a tzinfo subclass. The remaining arguments may be ints.
[0;31mFile:[0m           /opt/conda/lib/python3.7/datetime.py
[0;31mType:[0m           type
[0;31mSubclasses:[0m     _NaT, _Timestamp


In [9]:
dt.timedelta?

[0;31mInit signature:[0m [0mdt[0m[0;34m.[0m[0mtimedelta[0m[0;34m([0m[0mself[0m[0;34m,[0m [0;34m/[0m[0;34m,[0m [0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Difference between two datetime values.

timedelta(days=0, seconds=0, microseconds=0, milliseconds=0, minutes=0, hours=0, weeks=0)

All arguments are optional and default to 0.
Arguments may be integers or floats, and may be positive or negative.
[0;31mFile:[0m           /opt/conda/lib/python3.7/datetime.py
[0;31mType:[0m           type
[0;31mSubclasses:[0m     _Timedelta


In [10]:
dt.timedelta?

[0;31mInit signature:[0m [0mdt[0m[0;34m.[0m[0mtimedelta[0m[0;34m([0m[0mself[0m[0;34m,[0m [0;34m/[0m[0;34m,[0m [0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Difference between two datetime values.

timedelta(days=0, seconds=0, microseconds=0, milliseconds=0, minutes=0, hours=0, weeks=0)

All arguments are optional and default to 0.
Arguments may be integers or floats, and may be positive or negative.
[0;31mFile:[0m           /opt/conda/lib/python3.7/datetime.py
[0;31mType:[0m           type
[0;31mSubclasses:[0m     _Timedelta


In [11]:
dt.timezone?

[0;31mInit signature:[0m [0mdt[0m[0;34m.[0m[0mtimezone[0m[0;34m([0m[0mself[0m[0;34m,[0m [0;34m/[0m[0;34m,[0m [0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m      Fixed offset from UTC implementation of tzinfo.
[0;31mFile:[0m           /opt/conda/lib/python3.7/datetime.py
[0;31mType:[0m           type
[0;31mSubclasses:[0m     


# `datetime`

Constructors are:
- `datetime.datetime(*pargs, **kwargs)`
- `datetime.today(*pargs, **kwargs)`
- `datetime.now(*pargs, **kwargs)`
- `datetime.utcnow(*pargs, **kwargs)`
- `datetime.fromtimestamp(*pargs, **kwargs)`
- `datetime.fromordinal(*pargs, **kwargs)`
- `datetime.combine(*pargs, **kwargs)`
- `datetime.fromisoformat(*pargs, **kwargs)`
- `datetime.strptime(*pargs, **kwargs)`

`Datetime` objects have:
- class attributes `min`, `max` an `resolution`
- instance attributes `year`, `month`, `day`, `hour`, `minute`, `second`, `microsecond`, `tzinfo`, `fold`


See the documentation for usage.

In [12]:
t1 = dt.datetime(2019, 1, 1, 0, 0, 0, 0)
t1

datetime.datetime(2019, 1, 1, 0, 0)

In [13]:
now = dt.datetime.now()
now

datetime.datetime(2019, 8, 8, 20, 51, 46, 689050)

In [14]:
now - t1

datetime.timedelta(days=219, seconds=75106, microseconds=689050)

In [16]:
def generate_date():
    pass

# Pandas 

In [75]:
dti = date_range('2018-01-01', periods=3, freq='H')

In [80]:
date_range('1/1/2012', freq='0.1ms', periods=1000)

DatetimeIndex([       '2012-01-01 00:00:00', '2012-01-01 00:00:00.000100',
               '2012-01-01 00:00:00.000200', '2012-01-01 00:00:00.000300',
               '2012-01-01 00:00:00.000400', '2012-01-01 00:00:00.000500',
               '2012-01-01 00:00:00.000600', '2012-01-01 00:00:00.000700',
               '2012-01-01 00:00:00.000800', '2012-01-01 00:00:00.000900',
               ...
               '2012-01-01 00:00:00.099000', '2012-01-01 00:00:00.099100',
               '2012-01-01 00:00:00.099200', '2012-01-01 00:00:00.099300',
               '2012-01-01 00:00:00.099400', '2012-01-01 00:00:00.099500',
               '2012-01-01 00:00:00.099600', '2012-01-01 00:00:00.099700',
               '2012-01-01 00:00:00.099800', '2012-01-01 00:00:00.099900'],
              dtype='datetime64[ns]', length=1000, freq='100U')

In [None]:
pdf = pd.DataFrame({'naive': [datetime(2019, 1, 1, 0)],
                    'aware': [Timestamp(year=2019, month=1, day=1,
                    nanosecond=500, tz=timezone(timedelta(hours=+1)))]})

In [135]:
>>> pdf = DataFrame({'naive': [dt.datetime(2019, 1, 1, 0)],
...                  'aware': [Timestamp(year=2019, month=1, day=1,
...                            nanosecond=500, tz=dt.timezone(dt.timedelta(hours=-8)))]})
>>> pdf

NameError: name 'Timestamp' is not defined

# Spark

Spark stores timestamps as 64-bit integers representing microseconds since the UNIX epoch. It does not store any metadata about time zones with its timestamps.

Spark interprets timestamps with the session local time zone, (i.e. `spark.sql.session.timeZone`). If that time zone is undefined, Spark turns to the default system time zone.

In [136]:
from pyspark.sql import SparkSession
from pyarrow import TimestampValue

In [129]:
spark = ( SparkSession.builder
         .master("local")
         .appName("time")
         .getOrCreate())

In [137]:
TimestampValue?

[0;31mInit signature:[0m [0mTimestampValue[0m[0;34m([0m[0mself[0m[0;34m,[0m [0;34m/[0m[0;34m,[0m [0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m      Concrete class for timestamp array elements.
[0;31mFile:[0m           /opt/conda/lib/python3.7/site-packages/pyarrow/lib.cpython-37m-x86_64-linux-gnu.so
[0;31mType:[0m           type
[0;31mSubclasses:[0m     
