In [6]:
import pandas as pd
import numpy as np
from datetime import timedelta

# Load your CSV file
df = pd.read_csv('customer_support_tickets.csv')

# Parse datetime columns
df['First Response Time'] = pd.to_datetime(df['First Response Time'], errors='coerce')
df['Time to Resolution'] = pd.to_datetime(df['Time to Resolution'], errors='coerce')

def make_complaint_date(row):
    # Find the earliest event time
    times = [row['First Response Time'], row['Time to Resolution']]
    times = [t for t in times if pd.notnull(t)]
    if not times:
        return ""
    earliest = min(times)
    # Set a maximum lookback (up to 7 days before the earliest event)
    max_hours = min(720, (earliest - pd.Timestamp('2020-01-01')).total_seconds() / 3600)
    if max_hours < 1:
        delta_hours = 1
    else:
        # 50% chance: 1–6 hours; 50% chance: 24, 36, 48, 72, 96, 120, 144, 168 hours
        if np.random.rand() < 0.5:
            delta_hours = np.random.uniform(1, 6)
        else:
            delta_hours = int(np.random.choice([24, 36, 48, 72, 96, 120, 144, 168, 720]))
        delta_hours = min(delta_hours, max_hours)
    # Randomize minutes and seconds for further variation
    random_minutes = np.random.randint(0, 60)
    random_seconds = np.random.randint(0, 60)
    complaint_dt = earliest - timedelta(hours=delta_hours, minutes=random_minutes, seconds=random_seconds)
    # Make sure the complaint time is strictly before the earliest event
    if complaint_dt >= earliest:
        complaint_dt = earliest - timedelta(seconds=1)
    return complaint_dt.strftime("%Y-%m-%d %H:%M:%S")

# Apply function to create new column
df['Complaint Date'] = df.apply(make_complaint_date, axis=1)

# Save updated CSV
df.to_csv('customer_support_tickets_with_complaint_date.csv', index=False)

print("File saved as customer_support_tickets_with_complaint_date.csv")


File saved as customer_support_tickets_with_complaint_date.csv
