In [1]:
import pandas as pd

In [3]:
date_strings = ['2024-01-01', '2024-02-01', '2024-03-01']
dates = pd.to_datetime(date_strings)
dates

DatetimeIndex(['2024-01-01', '2024-02-01', '2024-03-01'], dtype='datetime64[ns]', freq=None)

In [4]:
delta = pd.Timedelta(days=5)
print(delta)

# Example: Calculating differences between two dates
diff = pd.Timestamp('2024-01-01') - pd.Timestamp('2023-12-25')
print(diff)


5 days 00:00:00
7 days 00:00:00


In [5]:
# Example: Creating a period for a single month
period = pd.Period('2024-01', freq='M')
print(period)


2024-01


In [9]:
# Example: Creating a DataFrame with a DatetimeIndex
date_range = pd.date_range(start='2024-01-01', periods=10, freq='D')
df = pd.DataFrame({'value': range(10)}, index=date_range)

print(df['2024-01-08':'2024-01-07'])






Empty DataFrame
Columns: [value]
Index: []


In [15]:
df1 = pd.DataFrame({'value': range(5)}, index=pd.date_range('2024-01-01', periods=5))
df2 = pd.DataFrame({'value': range(5, 10)}, index=pd.date_range('2024-01-03', periods=5))

aligned_sum = df1 + df2



value    float64
dtype: object

In [16]:
import pandas as pd

def group_by_time_proximity(df, time_column, time_threshold='1H'):
    """
    Groups data by proximity in time. Consecutive rows within the specified time threshold are grouped together.
    
    Parameters:
    - df: Pandas DataFrame containing the data.
    - time_column: The name of the column with the time information.
    - time_threshold: The time threshold to define proximity (default '1H' for 1 hour).
    
    Returns:
    A new DataFrame with an additional 'group' column indicating the group ID.
    """
    # Convert the time column to datetime if it's not already
    df[time_column] = pd.to_datetime(df[time_column])

    # Sort the DataFrame by the time column (important for grouping consecutive close times)
    df = df.sort_values(by=time_column).reset_index(drop=True)

    # Initialize group counter and the list to hold group assignments
    group_id = 0
    group_labels = [group_id]

    # Iterate over rows and assign group ids based on proximity
    for i in range(1, len(df)):
        # Calculate the time difference between the current and previous row
        time_diff = df[time_column].iloc[i] - df[time_column].iloc[i - 1]

        # If the time difference exceeds the threshold, create a new group
        if time_diff > pd.Timedelta(time_threshold):
            group_id += 1
        
        group_labels.append(group_id)
    
    # Add the group labels as a new column in the DataFrame
    df['group'] = group_labels

    return df
    
# Example usage
data = {
    'timestamp': ['2024-01-01 00:00:00', '2024-01-01 00:30:00', 
                  '2024-01-01 02:00:00', '2024-01-01 02:15:00', 
                  '2024-01-01 05:00:00'],
    'value': [10, 20, 30, 40, 50]
}
df = pd.DataFrame(data)

# Group by time proximity (e.g., within 1 hour)
result = group_by_time_proximity(df, 'timestamp', time_threshold='1H')

print(result)


            timestamp  value  group
0 2024-01-01 00:00:00     10      0
1 2024-01-01 00:30:00     20      0
2 2024-01-01 02:00:00     30      1
3 2024-01-01 02:15:00     40      1
4 2024-01-01 05:00:00     50      2


  if time_diff > pd.Timedelta(time_threshold):
