In [1]:
import pandas as pd
import glob

# Get all csv files in a folder
files = glob.glob("./*.csv")

# Read and combine
df = pd.concat((pd.read_csv(f) for f in files), ignore_index=True)

In [6]:
import json

# keep only user + assistant and remove tool calling
df = df[df["role"].isin(["user", "assistant"])].copy()

# parse + sort (oldest first within each conversation)
df["date"] = pd.to_datetime(df["date"], utc=True)
# tiebreaker: user first, then assistant
role_order = {"user": 0, "assistant": 1}
df["_role_order"] = df["role"].map(role_order).fillna(9).astype(int)
df = df.sort_values(["conversation_id", "date","_role_order"], ascending=True)

# build a per-conversation "full_conversation" JSONL-style string (easy to parse)
def build_full_conversation(group: pd.DataFrame) -> str:
    msgs = [{"role": r, "content": c} for r, c in zip(group["role"], group["content"])]
    # one JSON object per line (JSONL) is very LLM-friendly
    return "\n".join(json.dumps(m, ensure_ascii=False) for m in msgs)

# Collapse each conversation into one row, building the full message history and extracting the assistant_id from assistant messages
df_conversations = (
    df.groupby("conversation_id")
      .apply(lambda g: pd.Series({
          "full_conversation": build_full_conversation(g),
          "assistant_id": g.loc[g["role"] == "assistant", "assistant_id"]
                           .dropna()
                           .iloc[0] if (g["role"] == "assistant").any() else None
      }))
      .reset_index()
)

  .apply(lambda g: pd.Series({


In [10]:
# Map assistant_id -> assistant_name
assistant_id_to_name = {
    "copilot-RZMEQxdnWT6p8tnvBebo": "Official | TCS | FFH Retention",
    # TODO: Add list of ids with correspondant names after I find it
}

df_conversations["assistant_name"] = (
    df_conversations["assistant_id"]
      .map(assistant_id_to_name)
      .fillna(df_conversations["assistant_id"])  # fallback: keep the id if unknown
)

df_conversations = df_conversations.drop(columns=["assistant_id"])


In [11]:
df_conversations

Unnamed: 0,conversation_id,full_conversation,assistant_name
0,chat-002PTro68yVyTCgqQgX1,"{""role"": ""user"", ""content"": ""Start Save - I'll...",Official | TCS | FFH Retention
1,chat-004AykuA8aAetpBFdLPF,"{""role"": ""user"", ""content"": ""Free Mode - I'll ...",Official | TCS | FFH Retention
2,chat-004yyEnJIBt3xGgl6RSN,"{""role"": ""user"", ""content"": ""Price Increase - ...",Official | TCS | FFH Retention
3,chat-005ZGc92bop08ZVuG3B3,"{""role"": ""user"", ""content"": ""The customer call...",Official | TCS | FFH Retention
4,chat-006jBSJxhXOprTC3bNpO,"{""role"": ""user"", ""content"": ""Probe for Value""}...",Official | TCS | FFH Retention
...,...,...,...
11417,chat-3sNCMrkGLE8maYrRMza1,"{""role"": ""user"", ""content"": ""please provide th...",Official | TCS | FFH Retention
11418,chat-3sPOjwve1Y2mBxn30aGF,"{""role"": ""user"", ""content"": ""Xinyu Wang called...",Official | TCS | FFH Retention
11419,chat-3sRPNVw4eMm7prGAqALm,"{""role"": ""user"", ""content"": ""renew contract""}\...",Official | TCS | FFH Retention
11420,chat-3sV1YFmFkloMqi4AZBkt,"{""role"": ""user"", ""content"": ""Free Mode - I'll ...",Official | TCS | FFH Retention
