In [1]:
import pandas as pd
from datetime import datetime

In [2]:
# Creating a DataFrame with date strings
data = {'Date': ['2022-01-01', '2022-02-01', '2022-03-01']}
df = pd.DataFrame(data)
fmt='%Y-%m-%d'
# Converting the 'Date' column to datetime format
#option 1: .str.strptime
dates=df['Date'].apply(lambda x: datetime.strptime(x, fmt))
print(dates)
#option 2: .astype
dates=df['Date'].astype('datetime64')
print(dates)
#option 3: pd.to_datetme
dates=pd.to_datetime(df['Date'], format=fmt)
print(dates)

0   2022-01-01
1   2022-02-01
2   2022-03-01
Name: Date, dtype: datetime64[ns]
0   2022-01-01
1   2022-02-01
2   2022-03-01
Name: Date, dtype: datetime64[ns]
0   2022-01-01
1   2022-02-01
2   2022-03-01
Name: Date, dtype: datetime64[ns]


In [3]:
output_format = '%m/%d/%Y'
# Convert datetime series to formatted strings
dates.dt.strftime(output_format)

0    01/01/2022
1    02/01/2022
2    03/01/2022
Name: Date, dtype: object

In [4]:
df['Date']=pd.to_datetime(df['Date'], format=fmt)
# Extract year, month, day from Date
df = df.assign(
    year=lambda x: x['Date'].dt.year,
    month=lambda x: x['Date'].dt.month,
    day=lambda x: x['Date'].dt.day
)
print(df)

        Date  year  month  day
0 2022-01-01  2022      1    1
1 2022-02-01  2022      2    1
2 2022-03-01  2022      3    1


In [5]:
# Creating a DataFrame with timestamps and values
data = {'Timestamp': ['2022-01-01 10:00:00', '2022-01-01 12:00:00', '2022-01-01 14:00:00'],
        'Value': [10, 20, 30]}
df = pd.DataFrame(data)
# Converting the 'Timestamp' column to datetime format
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
# Filtering data for a specific time range
start_time = pd.to_datetime('2022-01-01 11:00:00')
end_time = pd.to_datetime('2022-01-01 13:00:00')
#option1
filtered_df1 = df[(df['Timestamp'] >= start_time) & (df['Timestamp'] <= end_time)]
print(filtered_df1)
#option1
filtered_df2 = df.query("@start_time<=Timestamp<=@end_time")
print(filtered_df2)

            Timestamp  Value
1 2022-01-01 12:00:00     20
            Timestamp  Value
1 2022-01-01 12:00:00     20


In [6]:
# Creating a DataFrame with daily data
data = {'Date': pd.date_range(start='2022-01-01', periods=60, freq='D'),
        'Value': range(1, 61)}
df = pd.DataFrame(data)
df.set_index('Date', inplace=True)

# Downsampling by take the first day of month and label day=1
monthly_begin=df.resample('M').first().rename(lambda x:x.replace(day=1))
print(monthly_begin)
# Downsampling to monthly frequency, taking the average value
monthly_data = df.resample('M').mean()
# Printing the downsampled DataFrame
print(monthly_data)

            Value
Date             
2022-01-01      1
2022-02-01     32
2022-03-01     60
            Value
Date             
2022-01-31   16.0
2022-02-28   45.5
2022-03-31   60.0


In [7]:
# Creating a DataFrame with hourly data
data = {'Timestamp': pd.date_range(start='2022-01-01', periods=24, freq='H'),
        'Value': [10, 20, 15, 30, 25, 35, 40, 50, 45, 55, 60, 70, 65, 80, 75, 90, 85, 100, 95, 110, 105, 120, 115, 130]}
df = pd.DataFrame(data)
df.set_index('Timestamp', inplace=True)
# Resampling to 4-hourly frequency, calculating the maximum value
resampled_data = df.resample('4H').max()
# Printing the resampled DataFrame
print(resampled_data)

                     Value
Timestamp                 
2022-01-01 00:00:00     30
2022-01-01 04:00:00     50
2022-01-01 08:00:00     70
2022-01-01 12:00:00     90
2022-01-01 16:00:00    110
2022-01-01 20:00:00    130


In [8]:
# Creating a DataFrame with daily data
data = {'Date': pd.date_range(start='2022-01-01', periods=10, freq='D'),
        'Value': [10, 15, 20, 25, 30, 35, 40, 45, 50, 55]}
df = pd.DataFrame(data)
df.set_index('Date', inplace=True)

# Calculating the expanding sum of the 'Value' column
df['ExpandingSum'] = df['Value'].expanding().sum()
# Printing the DataFrame with expanding sums
print(df)

            Value  ExpandingSum
Date                           
2022-01-01     10          10.0
2022-01-02     15          25.0
2022-01-03     20          45.0
2022-01-04     25          70.0
2022-01-05     30         100.0
2022-01-06     35         135.0
2022-01-07     40         175.0
2022-01-08     45         220.0
2022-01-09     50         270.0
2022-01-10     55         325.0


In [9]:
# Creating a DataFrame with hourly data
data = {'Timestamp': pd.date_range(start='2022-01-01', periods=24, freq='H'),
        'Value': [10, 20, 15, 30, 25, 35, 40, 50, 45, 55, 60, 70, 65, 80, 75, 90, 85, 100, 95, 110, 105, 120, 115, 130]}
df = pd.DataFrame(data)
df.set_index('Timestamp', inplace=True)
# Defining a custom function for rolling max
def custom_rolling_max(series):
    return series.rolling(window=3).max()
# Applying the custom function to the 'Value' column
df['CustomRollingMax'] = df['Value'].transform(custom_rolling_max)
# Printing the DataFrame with custom rolling max
print(df)

                     Value  CustomRollingMax
Timestamp                                   
2022-01-01 00:00:00     10               NaN
2022-01-01 01:00:00     20               NaN
2022-01-01 02:00:00     15              20.0
2022-01-01 03:00:00     30              30.0
2022-01-01 04:00:00     25              30.0
2022-01-01 05:00:00     35              35.0
2022-01-01 06:00:00     40              40.0
2022-01-01 07:00:00     50              50.0
2022-01-01 08:00:00     45              50.0
2022-01-01 09:00:00     55              55.0
2022-01-01 10:00:00     60              60.0
2022-01-01 11:00:00     70              70.0
2022-01-01 12:00:00     65              70.0
2022-01-01 13:00:00     80              80.0
2022-01-01 14:00:00     75              80.0
2022-01-01 15:00:00     90              90.0
2022-01-01 16:00:00     85              90.0
2022-01-01 17:00:00    100             100.0
2022-01-01 18:00:00     95             100.0
2022-01-01 19:00:00    110             110.0
2022-01-01