In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('ChatTime-1-Pretrain-1M.csv')

In [19]:
import random

def describe_transition(v_t, v_t1, t):
    """
    Generates a structured description of the transition between two time series values.
    
    Parameters:
        v_t (float): Value at time step t
        v_t1 (float): Value at time step t+1
        t (int): Current time step
        domain (str): Context of the data (e.g., Economy, Climate, Stock Market)
        possible_cause (str): Hypothetical cause of the change
    
    Returns:
        dict: A structured representation of the transition
    """
    delta_v = round(v_t1 - v_t, 4)
    delta_percent = round((delta_v / abs(v_t)) * 100, 4) if v_t != 0 else float('inf')
    change_normalized = round(delta_v / 2, 4)  # Normalizing for range [-1,1]
    abs_change = abs(change_normalized)
    
    if v_t1 > v_t:
        change_type = "Increase"
        trend = "Upward Trend"
    elif v_t1 < v_t:
        change_type = "Decrease"
        trend = "Downward Trend"
    else:
        change_type = "No Change"
        trend = "Stable"

    # Formatting values for tokenization
    delta_v = f'###{delta_v}###'
    delta_percent = f'###{delta_percent}###'
    change_normalized = f'###{change_normalized}###'
    v_t = f'###{v_t}###'
    v_t1 = f'###{v_t1}###'
    
    # Categorizing magnitude for normalized data (-1 to 1)
    
    if abs_change < 0.05:
        magnitude = "Minimal"
    elif abs_change < 0.15:
        magnitude = "Small"
    elif abs_change < 0.3:
        magnitude = "Moderate"
    else:
        magnitude = "Significant"
    
    # Generating diverse paraphrased descriptions
    descriptions = [
        f"At time step {t}, the value changed from {v_t} to {v_t1}, a difference of {delta_v} ({delta_percent}%). This reflects a {magnitude.lower()} {trend.lower()}.",
        f"Between steps {t} and {t+1}, the metric shifted from {v_t} to {v_t1}, showing a change of {delta_v} ({delta_percent}%), categorized as a {magnitude.lower()} {trend.lower()}.",
        f"A {magnitude.lower()} {trend.lower()} was observed as the value moved from {v_t} to {v_t1}, a difference of {delta_v} ({delta_percent}%).",
        f"The measurement experienced a {trend.lower()} from {v_t} to {v_t1}, corresponding to a shift of {delta_v} ({delta_percent}%).",
        f"Data indicates a {magnitude.lower()} {trend.lower()} as the metric evolved from {v_t} to {v_t1}, with a difference of {delta_v} ({delta_percent}%).",
        f"An evident {trend.lower()} was noted as the value transitioned from {v_t} to {v_t1}, with a shift of {delta_v} ({delta_percent}%)."
    ]
    random.shuffle(descriptions)
    
    return {
        "time_step": {"t": t, "t+1": t + 1},
        "raw_values": {"t": v_t, "t+1": v_t1},
        "change": change_type,
        "change_magnitude": magnitude,
        "change_normalized": change_normalized,
        "trend": trend,
        "paraphrased_descriptions": descriptions[:3]  # Return a random subset for diversity
    }

def process_time_series(series, smooth_text=True):
    """
    Applies a sliding window of size 2 over a time series clip and generates structured transition descriptions.
    
    Parameters:
        series (list of float): Time series values.
        domain (str): Context of the data.
        possible_cause (str): Hypothetical cause of the changes.
        smooth_text (bool): Whether to add transitional text between descriptions.
    
    Returns:
        list: A list of structured transition descriptions.
    """
    transitions = []
    prev_trend = None
    
    for i in range(len(series) - 1):  # Sliding window of size 2
        v_t = float(series[i][3:-3])
        v_t1 = float(series[i+1][3:-3])
        transition = describe_transition(v_t, v_t1, i)
        
        if smooth_text and prev_trend:
            transition["paraphrased_descriptions"].insert(0, 
                f"Following the previous {prev_trend.lower()}, {transition['paraphrased_descriptions'][0]}"
            )
        
        transitions.append(transition)
        prev_trend = transition["trend"]
    
    return transitions

# Example usage
time_series_clip = df['text'][0].split(' ')[:5]
transitions = process_time_series(time_series_clip, smooth_text=True)
for t in transitions:
    print(t['paraphrased_descriptions'][0])


A minimal downward trend was observed as the value moved from ###-0.4159### to ###-0.4731###, a difference of ###-0.0572### (###-13.7533###%).
Following the previous downward trend, An evident upward trend was noted as the value transitioned from ###-0.4731### to ###-0.4343###, with a shift of ###0.0388### (###8.2012###%).
Following the previous upward trend, The measurement experienced a upward trend from ###-0.4343### to ###-0.3805###, corresponding to a shift of ###0.0538### (###12.3878###%).
Following the previous upward trend, An evident upward trend was noted as the value transitioned from ###-0.3805### to ###-0.3787###, with a shift of ###0.0018### (###0.4731###%).
