In [1]:
import json
import pandas as pd
import requests

### Load and join comments

In [2]:
with open('comments.json') as f:
    comments = json.load(f)

In [3]:
df = pd.DataFrame.from_dict(comments['comments'])
commenters = pd.DataFrame.from_dict(comments['commenters'])

In [4]:
df = pd.merge(df, commenters, how='left')

In [5]:
df.columns

Index(['commentHex', 'domain', 'url', 'commenterHex', 'markdown', 'html',
       'parentHex', 'score', 'state', 'creationDate', 'direction', 'deleted',
       'email', 'name', 'link', 'photo', 'provider', 'joinDate',
       'isModerator'],
      dtype='object')

In [6]:
df = df[['url', 'markdown', 'creationDate', 'name']]

### Clean comments

In [7]:
df.drop_duplicates(subset=['name', 'creationDate'], inplace=True)
df.sort_values(by='creationDate', ascending=True)
df.reset_index(inplace=True, drop=True)

In [8]:
df['url'] = df['url'].apply(lambda x: x[:-1] + x[-1].replace('/', ''))

In [9]:
df['url'] = df['url'].apply(lambda x: x.split('/')[-1])

In [10]:
df['url'] = 'https://eugeneyan.com/writing/' + df['url'] + '/'

In [13]:
df['name'].fillna('Anonymous', inplace=True)

In [14]:
df = df[df['markdown'] != '[deleted]'].copy()

In [15]:
df['creationDate'] = df['creationDate'].apply(lambda x: x.split('.')[0])

In [16]:
def format_date(creationDate):
    if creationDate[-1] == 'Z':
        return creationDate
    else:
        return creationDate + 'Z'

In [17]:
df['creationDate'] = df['creationDate'].apply(format_date)

In [18]:
df.sort_values(by='creationDate', inplace=True)

In [19]:
df['updated_markdown'] = df['markdown'] + '\n \nComment by **' + df['name'] + '** on ' + df['creationDate']

In [20]:
df['url'].value_counts()

https://eugeneyan.com/writing/note-taking-zettelkasten/                                      12
https://eugeneyan.com/writing/psych-grad-to-data-science-lead/                                8
https://eugeneyan.com/writing/end-to-end-data-science/                                        7
https://eugeneyan.com/writing/what-i-love-about-scrum-for-data-science/                       7
https://eugeneyan.com/writing/thoughts-on-cs7646-machine-learning-for-trading/                4
https://eugeneyan.com/writing/setting-up-python-project-for-automation-and-collaboration/     4
https://eugeneyan.com/writing/recommender-systems-baseline-pytorch/                           4
https://eugeneyan.com/writing/data-science-and-agile-what-works-and-what-doesnt/              4
https://eugeneyan.com/writing/what-i-do-during-a-data-science-project-to-ensure-success/      3
https://eugeneyan.com/writing/how-to-set-up-html-app-with-fastapi-jinja-forms-templates/      3
https://eugeneyan.com/writing/why-you-ne

In [21]:
df[df['url'] == 'https://eugeneyan.com/writing/how-to-set-up-html-app-with-fastapi-jinja-forms-templates/']

Unnamed: 0,url,markdown,creationDate,name,updated_markdown
66,https://eugeneyan.com/writing/how-to-set-up-ht...,"Hi Eugene, thanks a lot for this informative a...",2020-07-27T14:12:54Z,Dominik Haitz,"Hi Eugene, thanks a lot for this informative a..."
64,https://eugeneyan.com/writing/how-to-set-up-ht...,"Hey Dominik, thanks for sharing the comparison...",2020-07-27T14:50:42Z,Eugene Yan,"Hey Dominik, thanks for sharing the comparison..."
68,https://eugeneyan.com/writing/how-to-set-up-ht...,"great, thanks for your answer! :-)",2020-07-28T12:03:34Z,Dominik Haitz,"great, thanks for your answer! :-)\n \nComment..."


### Create issues

In [22]:
REPO_OWNER = ''
REPO_NAME = ''
TOKEN = ''

In [23]:
# Expected payload
# {
#   "issue": {
#     "title": "Imported from some other system",
#     "body": "..."
#   },
#   "comments": [
#     {
#       "body": "talk talk"
#     }
#   ]
# }

In [24]:
list(df['url'].value_counts().index)

['https://eugeneyan.com/writing/note-taking-zettelkasten/',
 'https://eugeneyan.com/writing/psych-grad-to-data-science-lead/',
 'https://eugeneyan.com/writing/end-to-end-data-science/',
 'https://eugeneyan.com/writing/what-i-love-about-scrum-for-data-science/',
 'https://eugeneyan.com/writing/thoughts-on-cs7646-machine-learning-for-trading/',
 'https://eugeneyan.com/writing/setting-up-python-project-for-automation-and-collaboration/',
 'https://eugeneyan.com/writing/recommender-systems-baseline-pytorch/',
 'https://eugeneyan.com/writing/data-science-and-agile-what-works-and-what-doesnt/',
 'https://eugeneyan.com/writing/what-i-do-during-a-data-science-project-to-ensure-success/',
 'https://eugeneyan.com/writing/how-to-set-up-html-app-with-fastapi-jinja-forms-templates/',
 'https://eugeneyan.com/writing/why-you-need-to-follow-up-after-your-data-science-project/',
 'https://eugeneyan.com/writing/reading-note-taking-writing/',
 'https://eugeneyan.com/writing/recommender-systems-graph-and-

In [25]:
urls = list(df['url'].value_counts().index)

In [26]:
github_url = 'https://api.github.com/repos/%s/%s/import/issues' % (REPO_OWNER, REPO_NAME)
    
# Headers
headers = {
    "Authorization": "token %s" % TOKEN,
    "Accept": "application/vnd.github.golden-comet-preview+json"
}

for url in urls:
    comment_list = []
    created_at = '2010-01-02'
    for _, row in df[df['url'] == url].iterrows():
        comment_list.append({'body': row['updated_markdown'], 'created_at': row['creationDate']})
        if row['creationDate'] > created_at:
            created_at = row['creationDate']
        
    payload = {'issue': {'title': url,
                          'body': 'Migrated from json into utteranc.es',
                          'created_at': created_at},
               'comments': comment_list}
    
    payload = json.dumps(payload)

    # Add the issue to our repository
    response = requests.request("POST", github_url, data=payload, headers=headers)
    if response.status_code == 202:
        print('Successfully created Issue "%s"' % url)
    else:
        print('Could not create Issue "%s"' % url)
        print('Response:', response.content)

Successfully created Issue "https://eugeneyan.com/writing/note-taking-zettelkasten/"
Successfully created Issue "https://eugeneyan.com/writing/psych-grad-to-data-science-lead/"
Successfully created Issue "https://eugeneyan.com/writing/end-to-end-data-science/"
Successfully created Issue "https://eugeneyan.com/writing/what-i-love-about-scrum-for-data-science/"
Successfully created Issue "https://eugeneyan.com/writing/thoughts-on-cs7646-machine-learning-for-trading/"
Successfully created Issue "https://eugeneyan.com/writing/setting-up-python-project-for-automation-and-collaboration/"
Successfully created Issue "https://eugeneyan.com/writing/recommender-systems-baseline-pytorch/"
Successfully created Issue "https://eugeneyan.com/writing/data-science-and-agile-what-works-and-what-doesnt/"
Successfully created Issue "https://eugeneyan.com/writing/what-i-do-during-a-data-science-project-to-ensure-success/"
Successfully created Issue "https://eugeneyan.com/writing/how-to-set-up-html-app-with-