Skip to content

Commit

Permalink
add script for locating clones on github
Browse files Browse the repository at this point in the history
  • Loading branch information
jakubzitny committed Jul 20, 2016
1 parent f1a41b2 commit 0111c6a
Showing 1 changed file with 29 additions and 0 deletions.
29 changes: 29 additions & 0 deletions tokenizers/file-level/githubpair.sh
@@ -0,0 +1,29 @@
#!/bin/bash
# TODO: make this general for the dataset

bookkeping=bookkeeping_files/
clonePairLoc=../../clone-detector/output7.0/
cloneList=`cat $clonePairLoc/tokens*`

githubUrlBase=https://github.com/
datasetLocation=/mnt/data/
repoInfoPath=/github/info.json

for line in $cloneList
do
c1=`echo $line | cut -d, -f1`
c2=`echo $line | cut -d, -f2`
# paths
paths=`grep -e ",\($c1\|$c2\)," $bookkeping/* | cut -d, -f3`
# github urls
for path in $paths
do
repoId=`echo $path | cut -d/ -f4`
filePath=`echo $path | cut -d/ -f6-`
repoWithNameSpace=`awk -F'"' '/full_name/{print $4}' $datasetLocation/$repoId/$repoInfoPath`
defaultBranch=`awk -F'"' '/default_branch/{print $4}' $datasetLocation/$repoId/$repoInfoPath`
echo $githubUrlBase$repoWithNameSpace/blob/$defaultBranch/$filePath
done
echo "==========="
done

0 comments on commit 0111c6a

Please sign in to comment.