In [1]:
import subprocess
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

## リポジトリのクローン

In [2]:
! [ -d poi.git ] || git clone --mirror https://github.com/apache/poi.git poi.git
! ls

poi.git  sample_data


## 1. コミットの列挙

In [3]:
! git -C poi.git log --all --oneline --no-merges --pretty=%H | head

09f368aab8474ac175fd8bd6f15f1fdc653ddc48
2c74a961a4638efebc26254e84428f018c8f5d05
76f113564fe70417fac1ff8dbb350a0b32f373c8
4ca89e156f849de0b991d98243c52e19db3c11bb
b0515e7493ce2900ff48be5e706524ecb2d3d59c
116c32137d504839b1f333d492f2221e4a6e1eb3
8d73b7ac2b4dd1555d5859fcf91275f53cad8c59
86891c7f93baff70db0004437cada00dfb2cf926
67ab5d5a2e6c5f606c6eefc2f4c40ac2880c8838
91569bd826007dd1a128df152de78d955f38e0cf


In [4]:
! git -C poi.git log --all --oneline --no-merges --pretty=%H | wc -l

25257


In [5]:
def enumerate_all_commits():
  cmd = f"git -C poi.git log --all --oneline --no-merges --pretty=%H"
  result = subprocess.run(cmd, shell=True, check=True, stdout=subprocess.PIPE, text=True)
  return result.stdout.strip().split('\n')

all_commits = enumerate_all_commits()

## 2. コミット範囲の指定

In [6]:
! git -C poi.git log --pretty=%H --since='one month ago'

2c74a961a4638efebc26254e84428f018c8f5d05
4ca89e156f849de0b991d98243c52e19db3c11bb
b0515e7493ce2900ff48be5e706524ecb2d3d59c
116c32137d504839b1f333d492f2221e4a6e1eb3
8d73b7ac2b4dd1555d5859fcf91275f53cad8c59
91569bd826007dd1a128df152de78d955f38e0cf
018961ea775cff57b74aad49864799e53756ab13
75729edcea0219c885e90cf74cf1ff6151f95b77
9951e8dfc1c34cb92611f1058751217c4f3ff542
3f4e7189b3190e796d1748fb308849cae0797ec8
39f4085d6ce9b6ceca3181eeb5e06f95e83d6634
2ebb2938b1ac29347a815e63ec490869258c18eb
0c3ebbb1bc9c62c4830e8b2a705d6972934f7529
83e38afcdbaaeb00ef9a9d198e63b6868705dbf8
1d5497f2c3fe774fd43b96049a8b0e96d1dce84f
a40ab5a551d27ad61898425cc0093c072a76f14b
d7ca791acea90773adb3cfbdf5099c076e51ab9f
b92a912e7df3a6bef54541e57e285f6d92686327
704e9a5439c150b681def39f758303d619259f24
be83ccf2ef8d6805ed2a5f3b95ae7ce2e68e12ef
ba6b7551074858575dce15129006ec435df60605
3755101b72140ca352f3fb7e97b7a0175c7f6127
3ee979c767775da053cbb7184001dba175f53d3c
6754be55a2dbdf9cd1e81fb5bb3c40043f6c6789
0e18766e1a5c90ac

## 3. コミット・トランザクションの抽出

In [7]:
! git -C poi.git show --pretty=format: --name-only c3fd1d5731af09aeb62a48026b6437bed4f2c28f

build.gradle
build.xml
poi-excelant/build.gradle
poi-integration/build.gradle
poi-ooxml/build.gradle


In [8]:
def transaction_of(cid):
  cmd = f"git -C poi.git show --pretty=format: --name-only {cid}"
  result = subprocess.run(cmd, shell=True, check=True, stdout=subprocess.PIPE, text=True)
  return result.stdout.strip().split('\n')

transaction_of("c3fd1d5731af09aeb62a48026b6437bed4f2c28f")

['build.gradle',
 'build.xml',
 'poi-excelant/build.gradle',
 'poi-integration/build.gradle',
 'poi-ooxml/build.gradle']

## 4. ターゲット履歴の抽出

In [9]:
! git -C poi.git log --pretty=%H -- src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFCell.java

37791e4bdfc706aa5684745594260f243b4be7ee
9c1eb6b18ee21a2e87a236d99b91f257d945f33a
326ff18e51cccabeef0c4d15d666354399fcd962
eaf9f147d3cc4d7a49131c8ec6946e9246b64806
20540f3c69eb1c48a0ebbd8c49bcfe6ea6f3e165
19c97f3285ddfc3f076b655707ef9f51281b6202
b10f94cc75d00d2b215f3a148dd4e79bbaf2b1b3
dc6a4a99e2d077ff9c9a343e218070d4df14d48d
a08b69df801bac0fdb40071fb2007b6bfac37bb1
53eee01b5dcd28ab894f85403a0e640c054f089f
5da229797f116457f2fdefad95306399b9757535
70b4b88a3ab2b35f078d91972ffa2641282b8bdd
bc30eceb020a1fa332cfcf24cb350652fe110306
19bf8b23d5dacee226c70cece3a315e5f1e47616
d60cb3a18c18b958998f066dc3591bc81dd85b3d
1253a295710ed82680dd5153e868edd87a5a981d
db14c353fc3b0089c597f92bcbf8315ecfa5c5c7
f64aef74177979942ec3fd1502c148f4927aad54
e501d4015df167e69688635df8092f993d5a9094
3aec436a34da31b247a1449225c75f3157a1bf63
6b433ae8b34b8c32a0457d74fe3b023222fe7945
08cfbcc90bffc4f348af495d013a650e96cf1dea
445ca472e1599768570854e07b35a6e19b46a851
3d4acce3a7e82e1e04fd228b5fa5d3b845472145
a96d86bfe8c8b43a

In [10]:
def commits_of(file):
  cmd = f"git -C poi.git log --pretty=%H -- {file}"
  result = subprocess.run(cmd, shell=True, check=True, stdout=subprocess.PIPE, text=True)
  return result.stdout.strip().split('\n')

commits_of("src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFCell.java")

['37791e4bdfc706aa5684745594260f243b4be7ee',
 '9c1eb6b18ee21a2e87a236d99b91f257d945f33a',
 '326ff18e51cccabeef0c4d15d666354399fcd962',
 'eaf9f147d3cc4d7a49131c8ec6946e9246b64806',
 '20540f3c69eb1c48a0ebbd8c49bcfe6ea6f3e165',
 '19c97f3285ddfc3f076b655707ef9f51281b6202',
 'b10f94cc75d00d2b215f3a148dd4e79bbaf2b1b3',
 'dc6a4a99e2d077ff9c9a343e218070d4df14d48d',
 'a08b69df801bac0fdb40071fb2007b6bfac37bb1',
 '53eee01b5dcd28ab894f85403a0e640c054f089f',
 '5da229797f116457f2fdefad95306399b9757535',
 '70b4b88a3ab2b35f078d91972ffa2641282b8bdd',
 'bc30eceb020a1fa332cfcf24cb350652fe110306',
 '19bf8b23d5dacee226c70cece3a315e5f1e47616',
 'd60cb3a18c18b958998f066dc3591bc81dd85b3d',
 '1253a295710ed82680dd5153e868edd87a5a981d',
 'db14c353fc3b0089c597f92bcbf8315ecfa5c5c7',
 'f64aef74177979942ec3fd1502c148f4927aad54',
 'e501d4015df167e69688635df8092f993d5a9094',
 '3aec436a34da31b247a1449225c75f3157a1bf63',
 '6b433ae8b34b8c32a0457d74fe3b023222fe7945',
 '08cfbcc90bffc4f348af495d013a650e96cf1dea',
 '445ca472

## 5. 頻出アイテムセットの抽出

In [11]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
import pandas as pd

def transactions_of(file):
  return [ transaction_of(c) for c in commits_of(file) ]

def mine_frequent_itemsets(transactions, min_sup=0.3):
  te = TransactionEncoder()
  te_ary = te.fit(transactions).transform(transactions)
  df = pd.DataFrame(te_ary, columns=te.columns_)

  return apriori(df, min_support=min_sup, use_colnames=True)

mine_frequent_itemsets(transactions_of("src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFCell.java"))

Unnamed: 0,support,itemsets
0,0.304636,(src/java/org/apache/poi/hssf/usermodel/HSSFCe...
1,0.993377,(src/ooxml/java/org/apache/poi/xssf/usermodel/...
2,0.344371,(src/ooxml/java/org/apache/poi/xssf/usermodel/...
3,0.304636,(src/ooxml/java/org/apache/poi/xssf/usermodel/...
4,0.344371,(src/ooxml/java/org/apache/poi/xssf/usermodel/...


## 6. 相関ルールマイニングの実施

In [12]:
from mlxtend.frequent_patterns import association_rules

def mine_association_rules(transactions, min_sup=0.3, min_conf=0.3):
  frequent_itemsets = mine_frequent_itemsets(transactions, min_sup)
  return association_rules(frequent_itemsets, num_itemsets=len(transactions), metric="confidence", min_threshold=min_conf)

mine_association_rules(transactions_of("src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFCell.java"))

  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(src/ooxml/java/org/apache/poi/xssf/usermodel/...,(src/java/org/apache/poi/hssf/usermodel/HSSFCe...,0.993377,0.304636,0.304636,0.306667,1.006667,1.0,0.002017,1.002929,1.0,0.306667,0.002921,0.653333
1,(src/java/org/apache/poi/hssf/usermodel/HSSFCe...,(src/ooxml/java/org/apache/poi/xssf/usermodel/...,0.304636,0.993377,0.304636,1.0,1.006667,1.0,0.002017,inf,0.009524,0.306667,1.0,0.653333
2,(src/ooxml/java/org/apache/poi/xssf/usermodel/...,(src/ooxml/java/org/apache/poi/xssf/usermodel/...,0.344371,0.993377,0.344371,1.0,1.006667,1.0,0.002281,inf,0.010101,0.346667,1.0,0.673333
3,(src/ooxml/java/org/apache/poi/xssf/usermodel/...,(src/ooxml/java/org/apache/poi/xssf/usermodel/...,0.993377,0.344371,0.344371,0.346667,1.006667,1.0,0.002281,1.003514,1.0,0.346667,0.003502,0.673333


7. 変更漏れの特定

In [13]:
def recommend_overlooked(files):
  result = []
  for f in files:
    for rule in mine_association_rules(transactions_of(f)).itertuples():
      if rule.antecedents <= files and not(rule.consequents <= files):
        for ov in (rule.consequents - files):
          result.append(ov)
  return result

recommend_overlooked(set(["src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFCell.java"]))

  and should_run_async(code)


['src/java/org/apache/poi/hssf/usermodel/HSSFCell.java',
 'src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFSheet.java']