# Issue Mining
Use GitHub API to mine all issues and PRs into a JSON file.

In [1]:
import requests, json
from util import authHeader

In [2]:
baseUrl = 'https://api.github.com/'

def apiCall(endpoint, **kwargs):
  return requests.get(baseUrl + endpoint, headers=authHeader, **kwargs)
def issues(page=1):
  return apiCall('repos/lampepfl/dotty/issues?page={0}'.format(page)).json()

In [None]:
allIssues = []
def processPage(page):
  pageIssues = issues(page)
  if pageIssues:
    print('Obtained {num} issues for page {page}'.format(num=len(pageIssues), page=page))
    allIssues.extend(pageIssues)
    processPage(page+1)
  else:
    print('No issues obtained for page {page}, we are done'.format(page=page))
processPage(1)

Obtained 30 issues for page 1
Obtained 30 issues for page 2
Obtained 30 issues for page 3
Obtained 30 issues for page 4
Obtained 30 issues for page 5


In [48]:
with open('issues.json', 'w') as f:
  json.dump(allIssues, f, indent=2)

# Working with mined issues

In [5]:
import re, json

In [6]:
with open('issues.json', 'r') as f:
  data = json.load(f)

## Filter standardised issues
These are the ones which follow pattern:

```
## Minimized code
...
## Output
...
```

Minimized code must contain a Scala snippet, and Output must contain an error snippet

**Note**: Can be refactored with `mistune` markdown parser:

```python
markdown = mistune.create_markdown(renderer=mistune.AstRenderer())
markdown(data[3]['body'])
```

In [67]:
def between(target, start, end):
  result = re.search('{start}(.*){end}'.format(start=start, end=end), target, re.DOTALL)
  if result:
    return result.group(1)
  
regions = {
  'minimized_code': ['## Minimized code', '## Output'],
  'output': ['## Output', '## Expectation'],
  'scala_block': ['```[Ss]cala', '```'],
  'code_block': ['```', '```']
}

def region(target, name):
  return between(target, regions[name][0], regions[name][1])

def issue_code(iss):
  body = iss['body']
  min_code = region(body, 'minimized_code')
  if min_code:
    scala_block = region(min_code, 'scala_block')
    return scala_block
filtered = [d for d in data if issue_code(d)]
'Found {0} issues'.format(len(filtered))

'Found 245 issues'

In [53]:
with open('filtered.json', 'w') as f:
  json.dump(filtered, f, indent=2)

# Create a Playground issue from spec

In [70]:
import os, shutil, textwrap

In [71]:
playground = '/Users/kmetiuk/Projects/scala3/playground/regression'
shutil.rmtree(playground, ignore_errors=True)
os.makedirs(playground)

In [73]:
for i in filtered:
  issue_id = i['number']
  print('Processing issue #{0}'.format(issue_id))
  issue_path = os.path.join(playground, 'i{0}.scala'.format(issue_id))

  src = textwrap.dedent(issue_code(i))
  code = textwrap.dedent('''
    // Issue #{issue_num}: https://github.com/lampepfl/dotty/issues/{issue_num}
    {code}
    /*
    Issue text:
    {issue_text}
    */
  '''.format(issue_num=issue_id, issue_text=i['body'], code=src)).lstrip()
  with open(issue_path, 'w') as f:
    f.write(code)

Processing issue #12805
Processing issue #12803
Processing issue #12802
Processing issue #12800
Processing issue #12796
Processing issue #12794
Processing issue #12788
Processing issue #12776
Processing issue #12759
Processing issue #12753
Processing issue #12739
Processing issue #12736
Processing issue #12731
Processing issue #12729
Processing issue #12722
Processing issue #12721
Processing issue #12715
Processing issue #12679
Processing issue #12668
Processing issue #12663
Processing issue #12662
Processing issue #12656
Processing issue #12655
Processing issue #12646
Processing issue #12645
Processing issue #12643
Processing issue #12640
Processing issue #12637
Processing issue #12627
Processing issue #12621
Processing issue #12600
Processing issue #12577
Processing issue #12573
Processing issue #12572
Processing issue #12571
Processing issue #12569
Processing issue #12556
Processing issue #12537
Processing issue #12536
Processing issue #12521
Processing issue #12514
Processing issue

# Execute issues

In [79]:
import subprocess

In [80]:
scala3_path = '/Users/kmetiuk/Projects/scala3/dotty/bin/scalac'
def scalac(issue_id):
  return subprocess.run('{scalac} i{issue_id}.scala'.format(scalac=scala3_path, issue_id=issue_id),
    cwd=playground, shell=True, capture_output=True)

In [81]:
compiling_issues = []
for current, issue in enumerate(filtered):
  issue_id = issue['number']
  res = scalac(issue_id)
  print('[{current} / {total}] Issue #{iid} compiled with status {status}'.format(
    iid=issue_id, status=res.returncode, current=current, total=len(filtered)
  ))
  if res.returncode == 0:
    compiling_issues.append(issue_id)

[0 / 245] Issue #12805 compiled with status 0
[1 / 245] Issue #12803 compiled with status 1
[2 / 245] Issue #12802 compiled with status 1
[3 / 245] Issue #12800 compiled with status 1
[4 / 245] Issue #12796 compiled with status 0
[5 / 245] Issue #12794 compiled with status 1
[6 / 245] Issue #12788 compiled with status 1
[7 / 245] Issue #12776 compiled with status 0
[8 / 245] Issue #12759 compiled with status 0
[9 / 245] Issue #12753 compiled with status 1
[10 / 245] Issue #12739 compiled with status 1
[11 / 245] Issue #12736 compiled with status 1
[12 / 245] Issue #12731 compiled with status 1
[13 / 245] Issue #12729 compiled with status 0
[14 / 245] Issue #12722 compiled with status 1
[15 / 245] Issue #12721 compiled with status 1
[16 / 245] Issue #12715 compiled with status 1
[17 / 245] Issue #12679 compiled with status 1
[18 / 245] Issue #12668 compiled with status 0
[19 / 245] Issue #12663 compiled with status 1
[20 / 245] Issue #12662 compiled with status 1
[21 / 245] Issue #12656

# Inspect individual issues

In [42]:
res = scalac(10640)

In [51]:
def escape_ansi(line):
    ansi_escape = re.compile(r'(?:\x1B[@-_]|[\x80-\x9F])[0-?]*[ -/]*[@-~]')
    return ansi_escape.sub('', line)

print(escape_ansi(res.stderr.decode('utf8')))

-- [E006] Not Found Error: i10640.scala:5:3 ------------------------------------
5 |  @targetName("func3_none")
  |   ^^^^^^^^^^
  |   Not found: type targetName

longer explanation available when compiling with `-explain`
-- [E006] Not Found Error: i10640.scala:6:45 -----------------------------------
6 |  def func3[A,B](a : A, b : B)(implicit ev1: Not[A<:<Tuple], ev2: Not[B<:<Tuple]): (A,B) = (a,b)
  |                                             ^^^
  |                                             Not found: type Not

longer explanation available when compiling with `-explain`
-- [E006] Not Found Error: i10640.scala:6:66 -----------------------------------
6 |  def func3[A,B](a : A, b : B)(implicit ev1: Not[A<:<Tuple], ev2: Not[B<:<Tuple]): (A,B) = (a,b)
  |                                                                  ^^^
  |                                                       Not found: type Not

longer explanation available when compiling with `-explain`
-- [E006] Not Found Er