In [1]:
import numpy as np
import pandas as pd

# `Timestamp`

    The timestamp object is used to create an instance of time. 
    
    We create timestamp object using pd.Timestamp() method.
    
    The timestamp object is very similiar to python datetime object but with expanded functionality.

In [2]:
pd.Timestamp('2024/3/24') # year/month/date

Timestamp('2024-03-24 00:00:00')

In [3]:
pd.Timestamp('2024-03-24')

Timestamp('2024-03-24 00:00:00')

In [4]:
pd.Timestamp(2024, 3, 24)

Timestamp('2024-03-24 00:00:00')

In [5]:
pd.Timestamp('24 March 2024')

Timestamp('2024-03-24 00:00:00')

In [6]:
pd.Timestamp('2024, 03, 24')

Timestamp('2024-03-24 00:00:00')

#### `We can also just pass the year and pandas is smart enough to understand that.`

In [7]:
pd.Timestamp('2024')

Timestamp('2024-01-01 00:00:00')

In [8]:
t = pd.Timestamp('2024')
t

Timestamp('2024-01-01 00:00:00')

In [9]:
type(t)

pandas._libs.tslibs.timestamps.Timestamp

# `datetime index object`

    We can also create an index object using multiple timestamps. This datetime index object can then be used to
    index a series or a dataframe inside pandas.

In [10]:
index = pd.DatetimeIndex(['2024/03/05', '2024/03/23', '2024/01/14'])
index

DatetimeIndex(['2024-03-05', '2024-03-23', '2024-01-14'], dtype='datetime64[ns]', freq=None)

    We can also provide timestamp objects directly inside the DatetimeIndex constructor.

In [11]:
index = pd.DatetimeIndex([pd.Timestamp(2024, 3, 5), pd.Timestamp(2024, 3, 23), pd.Timestamp(2024, 1, 14)])
index

DatetimeIndex(['2024-03-05', '2024-03-23', '2024-01-14'], dtype='datetime64[ns]', freq=None)

    We can also pass in the python's datetime objects inside the DatetimeIndex() constructor.

In [12]:
import datetime as dt

index = pd.DatetimeIndex([dt.datetime(2024, 3, 5), dt.datetime(2024, 3, 23), dt.datetime(2024, 1, 14)])
index

DatetimeIndex(['2024-03-05', '2024-03-23', '2024-01-14'], dtype='datetime64[ns]', freq=None)

#### `We can now use this datatime index object as a index in a series or a dataframe or as a column.`

In [13]:
ser = pd.Series([100, 900, 850], index = index)
ser

2024-03-05    100
2024-03-23    900
2024-01-14    850
dtype: int64

In [14]:
df = ser.reset_index()
df

Unnamed: 0,index,0
0,2024-03-05,100
1,2024-03-23,900
2,2024-01-14,850


In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   index   3 non-null      datetime64[ns]
 1   0       3 non-null      int64         
dtypes: datetime64[ns](1), int64(1)
memory usage: 180.0 bytes


### `We can now get various details from df['index'] using the dt accessor.`

In [16]:
df['index'].dt.year
df['index'].dt.month
df['index'].dt.day

0     5
1    23
2    14
Name: index, dtype: int32

In [17]:
df['index'].dt.minute
df['index'].dt.hour
df['index'].dt.second

0    0
1    0
2    0
Name: index, dtype: int32

In [18]:
dir(df['index'].dt)

['__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__frozen',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_accessors',
 '_add_delegate_accessors',
 '_constructor',
 '_delegate_method',
 '_delegate_property_get',
 '_delegate_property_set',
 '_dir_additions',
 '_dir_deletions',
 '_freeze',
 '_get_values',
 '_hidden_attrs',
 '_parent',
 '_reset_cache',
 'as_unit',
 'ceil',
 'date',
 'day',
 'day_name',
 'day_of_week',
 'day_of_year',
 'dayofweek',
 'dayofyear',
 'days_in_month',
 'daysinmonth',
 'floor',
 'freq',
 'hour',
 'is_leap_year',
 'is_month_end',
 'is_month_start',
 'is_quarter_end',
 'is_quarter_start',
 'is_year_end',
 'is_year_start',
 'isocalendar',
 'microsecond',


In [19]:
df['index'].dt.day_of_week

0    1
1    5
2    6
Name: index, dtype: int32

In [20]:
df['index'].dt.month_name()

0      March
1      March
2    January
Name: index, dtype: object

In [21]:
df['index'].dt.day_name()

0     Tuesday
1    Saturday
2      Sunday
Name: index, dtype: object

### `We can also create boolean masks using dt accessor.`

In [22]:
df['index'].dt.is_leap_year

0    True
1    True
2    True
Name: index, dtype: bool

In [23]:
df['index'].dt.is_month_end

0    False
1    False
2    False
Name: index, dtype: bool

In [24]:
df['index'].dt.is_month_start

0    False
1    False
2    False
Name: index, dtype: bool

In [25]:
df['index'].dt.is_year_end

0    False
1    False
2    False
Name: index, dtype: bool

In [26]:
df['index'].dt.is_year_start

0    False
1    False
2    False
Name: index, dtype: bool

# `date_range()`

    The date_range() object is used to create a range of pandas timestampes.

In [27]:
pd.date_range(start = '24 Jan 2024', end = '19 March 2024', freq = 'D')

DatetimeIndex(['2024-01-24', '2024-01-25', '2024-01-26', '2024-01-27',
               '2024-01-28', '2024-01-29', '2024-01-30', '2024-01-31',
               '2024-02-01', '2024-02-02', '2024-02-03', '2024-02-04',
               '2024-02-05', '2024-02-06', '2024-02-07', '2024-02-08',
               '2024-02-09', '2024-02-10', '2024-02-11', '2024-02-12',
               '2024-02-13', '2024-02-14', '2024-02-15', '2024-02-16',
               '2024-02-17', '2024-02-18', '2024-02-19', '2024-02-20',
               '2024-02-21', '2024-02-22', '2024-02-23', '2024-02-24',
               '2024-02-25', '2024-02-26', '2024-02-27', '2024-02-28',
               '2024-02-29', '2024-03-01', '2024-03-02', '2024-03-03',
               '2024-03-04', '2024-03-05', '2024-03-06', '2024-03-07',
               '2024-03-08', '2024-03-09', '2024-03-10', '2024-03-11',
               '2024-03-12', '2024-03-13', '2024-03-14', '2024-03-15',
               '2024-03-16', '2024-03-17', '2024-03-18', '2024-03-19'],
     

In [28]:
pd.date_range(start = '14 Jan 2024', end = '14 March 2024', freq = '3D')

DatetimeIndex(['2024-01-14', '2024-01-17', '2024-01-20', '2024-01-23',
               '2024-01-26', '2024-01-29', '2024-02-01', '2024-02-04',
               '2024-02-07', '2024-02-10', '2024-02-13', '2024-02-16',
               '2024-02-19', '2024-02-22', '2024-02-25', '2024-02-28',
               '2024-03-02', '2024-03-05', '2024-03-08', '2024-03-11',
               '2024-03-14'],
              dtype='datetime64[ns]', freq='3D')

In [29]:
pd.date_range(start = '14 Jan 2024', end = '12 March 2024', freq = 'B') # B stands for business days -- excluding weekend

DatetimeIndex(['2024-01-15', '2024-01-16', '2024-01-17', '2024-01-18',
               '2024-01-19', '2024-01-22', '2024-01-23', '2024-01-24',
               '2024-01-25', '2024-01-26', '2024-01-29', '2024-01-30',
               '2024-01-31', '2024-02-01', '2024-02-02', '2024-02-05',
               '2024-02-06', '2024-02-07', '2024-02-08', '2024-02-09',
               '2024-02-12', '2024-02-13', '2024-02-14', '2024-02-15',
               '2024-02-16', '2024-02-19', '2024-02-20', '2024-02-21',
               '2024-02-22', '2024-02-23', '2024-02-26', '2024-02-27',
               '2024-02-28', '2024-02-29', '2024-03-01', '2024-03-04',
               '2024-03-05', '2024-03-06', '2024-03-07', '2024-03-08',
               '2024-03-11', '2024-03-12'],
              dtype='datetime64[ns]', freq='B')

In [30]:
pd.date_range(start = '12 Jan 2024', end = '13 March 2024', freq = 'W') # gives first day of each week --by default sunday

DatetimeIndex(['2024-01-14', '2024-01-21', '2024-01-28', '2024-02-04',
               '2024-02-11', '2024-02-18', '2024-02-25', '2024-03-03',
               '2024-03-10'],
              dtype='datetime64[ns]', freq='W-SUN')

### `We can also specify which day we want to be considered as the first day of the week.`

In [31]:
pd.date_range(start = '12 Jan 2024', end = '13 March 2024', freq = 'W-MON') # considers MONDAY as the first day of the week

DatetimeIndex(['2024-01-15', '2024-01-22', '2024-01-29', '2024-02-05',
               '2024-02-12', '2024-02-19', '2024-02-26', '2024-03-04',
               '2024-03-11'],
              dtype='datetime64[ns]', freq='W-MON')

In [32]:
pd.date_range(start = '12 Jan 2024', end = '13 March 2024', freq = 'M') # consider only the month end

DatetimeIndex(['2024-01-31', '2024-02-29'], dtype='datetime64[ns]', freq='M')

In [33]:
pd.date_range(start = '12 Jan 2024', end = '13 March 2024', freq = 'MS') # consider only the month start

DatetimeIndex(['2024-02-01', '2024-03-01'], dtype='datetime64[ns]', freq='MS')

### `The date_range() method expects us to pass exactly three parameters out of four.`

      The 4 parameters inside the date_range() are start, end, freq, and periods.
      
      We need to specify exactly three of them.

In [34]:
pd.date_range(start = '12 Jan 2024', periods = 25, freq = '3D') # periods = 25 means give me 25 such days

DatetimeIndex(['2024-01-12', '2024-01-15', '2024-01-18', '2024-01-21',
               '2024-01-24', '2024-01-27', '2024-01-30', '2024-02-02',
               '2024-02-05', '2024-02-08', '2024-02-11', '2024-02-14',
               '2024-02-17', '2024-02-20', '2024-02-23', '2024-02-26',
               '2024-02-29', '2024-03-03', '2024-03-06', '2024-03-09',
               '2024-03-12', '2024-03-15', '2024-03-18', '2024-03-21',
               '2024-03-24'],
              dtype='datetime64[ns]', freq='3D')

In [35]:
pd.date_range(start = '12 Jan 2024 9:00AM', periods = 25, freq = 'H') # 'H' means hourly

DatetimeIndex(['2024-01-12 09:00:00', '2024-01-12 10:00:00',
               '2024-01-12 11:00:00', '2024-01-12 12:00:00',
               '2024-01-12 13:00:00', '2024-01-12 14:00:00',
               '2024-01-12 15:00:00', '2024-01-12 16:00:00',
               '2024-01-12 17:00:00', '2024-01-12 18:00:00',
               '2024-01-12 19:00:00', '2024-01-12 20:00:00',
               '2024-01-12 21:00:00', '2024-01-12 22:00:00',
               '2024-01-12 23:00:00', '2024-01-13 00:00:00',
               '2024-01-13 01:00:00', '2024-01-13 02:00:00',
               '2024-01-13 03:00:00', '2024-01-13 04:00:00',
               '2024-01-13 05:00:00', '2024-01-13 06:00:00',
               '2024-01-13 07:00:00', '2024-01-13 08:00:00',
               '2024-01-13 09:00:00'],
              dtype='datetime64[ns]', freq='H')

# to_datetime() method

In [36]:
expense = pd.read_csv('expense_data.csv')
expense.head()

Unnamed: 0,Date,Account,Category,Subcategory,Note,INR,Income/Expense,Note.1,Amount,Currency,Account.1
0,3/2/2022 10:11,CUB - online payment,Food,,Brownie,50.0,Expense,,50.0,INR,50.0
1,3/2/2022 10:11,CUB - online payment,Other,,To lended people,300.0,Expense,,300.0,INR,300.0
2,3/1/2022 19:50,CUB - online payment,Food,,Dinner,78.0,Expense,,78.0,INR,78.0
3,3/1/2022 18:56,CUB - online payment,Transportation,,Metro,30.0,Expense,,30.0,INR,30.0
4,3/1/2022 18:22,CUB - online payment,Food,,Snacks,67.0,Expense,,67.0,INR,67.0


In [37]:
expense.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 277 entries, 0 to 276
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Date            277 non-null    object 
 1   Account         277 non-null    object 
 2   Category        277 non-null    object 
 3   Subcategory     0 non-null      float64
 4   Note            273 non-null    object 
 5   INR             277 non-null    float64
 6   Income/Expense  277 non-null    object 
 7   Note.1          0 non-null      float64
 8   Amount          277 non-null    float64
 9   Currency        277 non-null    object 
 10  Account.1       277 non-null    float64
dtypes: float64(5), object(6)
memory usage: 23.9+ KB


In [38]:
expense['Date'] = pd.to_datetime(expense['Date'])
expense.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 277 entries, 0 to 276
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Date            277 non-null    datetime64[ns]
 1   Account         277 non-null    object        
 2   Category        277 non-null    object        
 3   Subcategory     0 non-null      float64       
 4   Note            273 non-null    object        
 5   INR             277 non-null    float64       
 6   Income/Expense  277 non-null    object        
 7   Note.1          0 non-null      float64       
 8   Amount          277 non-null    float64       
 9   Currency        277 non-null    object        
 10  Account.1       277 non-null    float64       
dtypes: datetime64[ns](1), float64(5), object(5)
memory usage: 23.9+ KB


In [39]:
expense['day'] = expense['Date'].dt.day_name()
expense.head()

Unnamed: 0,Date,Account,Category,Subcategory,Note,INR,Income/Expense,Note.1,Amount,Currency,Account.1,day
0,2022-03-02 10:11:00,CUB - online payment,Food,,Brownie,50.0,Expense,,50.0,INR,50.0,Wednesday
1,2022-03-02 10:11:00,CUB - online payment,Other,,To lended people,300.0,Expense,,300.0,INR,300.0,Wednesday
2,2022-03-01 19:50:00,CUB - online payment,Food,,Dinner,78.0,Expense,,78.0,INR,78.0,Tuesday
3,2022-03-01 18:56:00,CUB - online payment,Transportation,,Metro,30.0,Expense,,30.0,INR,30.0,Tuesday
4,2022-03-01 18:22:00,CUB - online payment,Food,,Snacks,67.0,Expense,,67.0,INR,67.0,Tuesday
