In [2]:
import pandas as pd

In [12]:
date_range = pd.date_range(start = '2024-01-01', end = '2024-08-28', freq = 'D')
print(date_range)

DatetimeIndex(['2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04',
               '2024-01-05', '2024-01-06', '2024-01-07', '2024-01-08',
               '2024-01-09', '2024-01-10',
               ...
               '2024-08-19', '2024-08-20', '2024-08-21', '2024-08-22',
               '2024-08-23', '2024-08-24', '2024-08-25', '2024-08-26',
               '2024-08-27', '2024-08-28'],
              dtype='datetime64[ns]', length=241, freq='D')


In [16]:
data = {'Date' : ['2024-01-01', '2024-02-10', '2024-03-20']}
df = pd.DataFrame(data)
df['Date'] = pd.to_datetime(df['Date'])

df['Day'] = df['Date'].dt.day
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year

print(df[['Date', 'Day', 'Month', 'Year']])

        Date  Day  Month  Year
0 2024-01-01    1      1  2024
1 2024-02-10   10      2  2024
2 2024-03-20   20      3  2024


In [22]:
df = pd.DataFrame({'Date' : pd.date_range(start = '2024-08-28', periods = 5)})
df['Weekday'] = df['Date'].dt.weekday
df['IsWeekend'] = df['Date'].dt.weekday // 5 == 1
print(df[['Date', 'Weekday', 'IsWeekend']])

        Date  Weekday  IsWeekend
0 2024-08-28        2      False
1 2024-08-29        3      False
2 2024-08-30        4      False
3 2024-08-31        5       True
4 2024-09-01        6       True


In [28]:
df['Date'] = pd.to_datetime(df['Date'])
df['PreviousDate'] = df['Date'] - pd.Timedelta(days = 1)
df['NextDate'] = df['Date'] + pd.Timedelta(days = 1)
print(df[['Date', 'PreviousDate', 'NextDate']])

        Date PreviousDate   NextDate
0 2024-08-28   2024-08-27 2024-08-29
1 2024-08-29   2024-08-28 2024-08-30
2 2024-08-30   2024-08-29 2024-08-31
3 2024-08-31   2024-08-30 2024-09-01
4 2024-09-01   2024-08-31 2024-09-02


In [30]:
delta = pd.Timedelta(days = 3)
delta

Timedelta('3 days 00:00:00')

In [32]:
df['Date'] = pd.to_datetime(df['Date'])
df['FutureDate'] = df['Date'] + pd.Timedelta(weeks = 2, days = 3, hours = 12)
print(df[['Date', 'FutureDate']])

        Date          FutureDate
0 2024-08-28 2024-09-14 12:00:00
1 2024-08-29 2024-09-15 12:00:00
2 2024-08-30 2024-09-16 12:00:00
3 2024-08-31 2024-09-17 12:00:00
4 2024-09-01 2024-09-18 12:00:00


In [38]:
categories = ['Low', 'Medium', 'High']
values = ['Low', 'Medium', 'High', 'Low', 'Low']
cat_variable = pd.Categorical(values, categories = categories, ordered = True)
print(cat_variable)

['Low', 'Medium', 'High', 'Low', 'Low']
Categories (3, object): ['Low' < 'Medium' < 'High']


In [40]:
df = pd.DataFrame({'Category' : ['A', 'B', 'A', 'C', 'B', 'A']})
category_counts = df['Category'].value_counts()
print(category_counts)

Category
A    3
B    2
C    1
Name: count, dtype: int64


In [42]:
df = pd.DataFrame({'Category': ['A', 'B', 'A', 'C', 'B', 'A']})
dummy_variables = pd.get_dummies(df['Category'], prefix='Category')
print(dummy_variables)

   Category_A  Category_B  Category_C
0        True       False       False
1       False        True       False
2        True       False       False
3       False       False        True
4       False        True       False
5        True       False       False


In [44]:
df = pd.DataFrame({'Category': ['A', 'B', 'A', 'C', 'B', 'A']})
df['Category_LabelEncoded'] = df['Category'].astype('category').cat.codes
print(df[['Category', 'Category_LabelEncoded']])

  Category  Category_LabelEncoded
0        A                      0
1        B                      1
2        A                      0
3        C                      2
4        B                      1
5        A                      0
