In [57]:
import pandas as pd
import datetime
import pytz

In [64]:
data = pd.read_json("../data/issues_v1.json", encoding="ISO-8859-1")
data["created_at"] = pd.to_datetime(data["created_at"])
data["updated_at"] = pd.to_datetime(data["updated_at"])
today = datetime.datetime.now(tz=pytz.timezone("Europe/Prague"))
data["days_since_created"] = (today - data["created_at"]).dt.days
data["days_since_updated"] = (today - data["updated_at"]).dt.days
data["body_length"] = data["body"].str.len()
data["created_at"] = data["created_at"].dt.strftime("%Y-%m-%d")
data["updated_at"] = data["updated_at"].dt.strftime("%Y-%m-%d")
data = data[~data["pull_request"]]

data_gpt = pd.read_json("../data/issues_v1_gpt.json", encoding="ISO-8859-1")
data = data.merge(
    data_gpt[["number", "summary", "short_label", "label"]], on="number", how="inner"
)


In [65]:
data.columns.tolist()

['number',
 'state',
 'title',
 'body',
 'comments',
 'created_at',
 'updated_at',
 'author_association',
 'html_url',
 'pull_request',
 'days_since_created',
 'days_since_updated',
 'body_length',
 'summary',
 'short_label',
 'label']

In [68]:
columns = [
    "id",
    "number",
    "state",
    "title",
    "created_at",
    "updated_at",
    "days_since_created",
    "days_since_updated",
    "author_association",
    "comments",
    "body_length",
    "summary",
    "short_label",
    "label",
]

df = pd.DataFrame(columns=columns)
links = {}
for index, row in data.iterrows():
    link_name = f"link_{index}_"
    links[link_name] = '<a href="{}">{}</a>'.format(row["html_url"], row["number"])
    df.loc[index] = [
        index,
        link_name,
        row["state"],
        row["title"],
        row["created_at"],
        row["updated_at"],
        row["days_since_created"],
        row["days_since_updated"],
        row["author_association"],
        row["comments"],
        row["body_length"],
        row["summary"],
        row["short_label"],
        row["label"],
    ]

html_str = df.to_html(index=False)
for link_name, link in links.items():
    html_str = html_str.replace(link_name, link)
with open("../issues_v1.html", "w") as f:
    f.write(html_str)

df = df[df["state"] == "open"].copy()
df = df.drop(columns=["state"])
html_str = df.to_html(index=False)
for link_name, link in links.items():
    html_str = html_str.replace(link_name, link)
with open("../issues_v1_open.html", "w") as f:
    f.write(html_str)

# sort by body length
df = df.sort_values(by=["body_length"], ascending=True)
html_str = df.to_html(index=False)
for link_name, link in links.items():
    html_str = html_str.replace(link_name, link)
with open("../issues_v1_open_ordered.html", "w") as f:
    f.write(html_str)

# sort by number of comments and then by days since updated
df = df.sort_values(by=["comments", "days_since_updated"], ascending=[False, True])
html_str = df.to_html(index=False)
for link_name, link in links.items():
    html_str = html_str.replace(link_name, link)
with open("../issues_v1_open_ordered_c.html", "w") as f:
    f.write(html_str)