<a href="https://colab.research.google.com/github/d0k7/Data-Wrangling-Lab/blob/main/DW_Lab_9_Date_Time_Processing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

# Sample dataset
data = {
    "id": [1, 2, 3, 4, 5],
    "datetime": pd.date_range(start="2024-01-01", periods=5, freq="D"),
    "value": [10, 20, 15, 25, 30],
}
df = pd.DataFrame(data)

# Ensure datetime column is in datetime format
df["datetime"] = pd.to_datetime(df["datetime"])

# Extract date/time components
df["year"] = df["datetime"].dt.year
df["month"] = df["datetime"].dt.month
df["day"] = df["datetime"].dt.day
df["weekday"] = df["datetime"].dt.weekday  # 0=Monday, 6=Sunday

# Calculate time differences
df["days_since_start"] = (df["datetime"] - pd.Timestamp("2024-01-01")).dt.days
df["time_diff"] = df["datetime"].diff().dt.days

# Aggregate by time intervals (monthly aggregation as an example)
monthly_agg = df.groupby(df["datetime"].dt.to_period("M")).agg({"value": "sum"}).reset_index()
monthly_agg["datetime"] = monthly_agg["datetime"].dt.to_timestamp()  # Convert period back to timestamp

# Display results
print("Original DataFrame with Date-Time Processing:")
print(df)
print("\nMonthly Aggregated Data:")
print(monthly_agg)


Original DataFrame with Date-Time Processing:
   id   datetime  value  year  month  day  weekday  days_since_start  \
0   1 2024-01-01     10  2024      1    1        0                 0   
1   2 2024-01-02     20  2024      1    2        1                 1   
2   3 2024-01-03     15  2024      1    3        2                 2   
3   4 2024-01-04     25  2024      1    4        3                 3   
4   5 2024-01-05     30  2024      1    5        4                 4   

   time_diff  
0        NaN  
1        1.0  
2        1.0  
3        1.0  
4        1.0  

Monthly Aggregated Data:
    datetime  value
0 2024-01-01    100
