<a href="https://colab.research.google.com/github/abay-qkt/chatgpt-exported-conversations-loader/blob/main/ChatGPT_ExportDataLoader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 準備

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import numpy as np

import json
from pathlib import Path

import ipywidgets as widgets
from IPython.display import HTML

import plotly.graph_objs as go
import plotly.express as px

from google.colab import output
output.enable_custom_widget_manager()

In [None]:
parent_path=Path("/content/drive/MyDrive/")

In [None]:
with open(parent_path/'conversations.json') as f:
  conv = json.load(f)

conv_df = pd.DataFrame(conv)
conv_df["mapping"] = conv_df["mapping"].map(lambda x:x.values()) # 辞書の中身だけ取り出す。（keyは中身に含まれていたので不要）
conv_df = conv_df.explode("mapping",ignore_index=True).reset_index(drop=True) # mapping列を行方向に展開
conv_df = pd.concat([
    conv_df.drop(["mapping"],axis=1),
    pd.json_normalize(conv_df["mapping"])  # mapping列の情報が列方向に展開される
],axis=1)
conv_df = conv_df.sort_values(["create_time","message.create_time"]).reset_index(drop=True)

# 日付型への変換
time_cols = ['create_time', 'update_time', 'message.create_time','message.update_time']
for col in time_cols:
  conv_df[col]=pd.to_datetime(conv_df[col],unit='s')

# テキストの抽出
def get_text_from_parts(parts):
  if type(parts)==list:
    texts = [p for p in parts if type(p)==str]  # 文字列要素のみ取得
    return "\n\n".join(texts)
  else:
    return np.nan
conv_df["message_text"] = conv_df["message.content.parts"].map(get_text_from_parts)

In [None]:
def get_hyperlink(conversation_id,title):
  url = f"https://chatgpt.com/c/{conversation_id}"
  link = f'<a href="{url}">{title}</a>'
  return link

def get_conv_html_str(conversation_id,title,create_time,message_head):
  link=get_hyperlink(conversation_id,title)
  html_str = create_time.strftime("%Y-%m-%d %H:%M:%S")+"<br>"
  html_str += link+": "
  html_str += message_head
  return html_str

In [None]:
conv_df["week_monday"] = (conv_df["create_time"]-pd.to_timedelta(conv_df["create_time"].dt.weekday, unit="D")).dt.strftime("%Y-%m-%d")
conv_info1 = conv_df[["conversation_id","week_monday","create_time","update_time","title"]].drop_duplicates()
conv_info2 = (
    conv_df[conv_df["message.author.role"]=='user']
    .sort_values("message.create_time")
    .groupby("conversation_id",as_index=False)["message_text"]
    .agg(["first","size"])
    .rename(columns={"first":"first_message","size":"message_count"})
)
conv_info = pd.merge(
    conv_info1,
    conv_info2,
    on='conversation_id'
)
conv_info["create_time_"]=conv_info["create_time"].dt.round('s')
conv_info["hyperlink"]=conv_info.apply(lambda x:get_hyperlink(x["conversation_id"],x["title"]),axis=1)
conv_info["message_head"]=conv_info["first_message"].str[:60]
conv_info["html_str"]=conv_info.apply(lambda x:get_conv_html_str(
    x["conversation_id"],x["title"],x["create_time_"],x["message_head"]),axis=1)

conv_info_weekly = (
    conv_info.groupby("week_monday",as_index=False)["message_count"]
    .agg(["sum","size"])
    .rename(columns={"sum":"message_count","size":"conversation_count"})
)

In [None]:
# 出力エリア
out = widgets.Output()

# FigureWidget作成
fig = go.FigureWidget(
    px.bar(conv_info_weekly, x="week_monday", y="message_count")
)
fig.update_layout(
    xaxis=dict(tickformat="%Y/%m/%d", rangeslider=dict(visible=True), type="date"),
    margin=dict(l=10,r=10,t=30,b=10)
)

# クリック時のコールバック
def on_bar_click(trace, points, selector):
    if not points.point_inds:
        return
    idx = points.point_inds[0]
    selected_date = conv_info_weekly.loc[idx, "week_monday"]

    filtered = conv_info[conv_info["week_monday"] == selected_date]["html_str"]

    with out:
        out.clear_output(wait=True)  # 前の表示を消す
        display(HTML("<br><br>".join(filtered)))

# イベント登録
fig.data[0].on_click(on_bar_click)

# 表示

In [None]:
# 棒をクリックすると、その週のチャットへのリンクが表示されます
display(fig, out)