In [2]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
%load_ext sql

In [3]:
%sql sqlite:///results.db

# Subcommands

Some commands can take *subcommands*, for example `git` has a subcommand `git pull`.
We want to be able to constrain some queries to only those commands that take subcommands. For this purpose, we look at the top X commands, collect those that can take subcommands, and create a new table containg those.

Note: the dataset from Zenodo already contains the `commands_with_subcommands` table.

In [None]:
select name, count(*)
from command
where command.num_arguments > 0
group by name
order by count(*) desc
limit 200;

In [None]:
subcmds.csv(filename='subcommands.csv')

In [None]:
%%sql
insert into commands_with_subcommands (name) values
('git'),('docker'),('bundle'),('brew'),('pacman'),('npm'),('apt-get'),('kubectl'),('systemctl'),('docker-compose'),('vagrant'),('apt'),('yarn'),('svn'),('rails'),('/usr/bin/git'),('adb'),('yaourt'),('gem'),('netstat'),('hg'),('apt-cache'),('yay'),('heroku'),('launchctl'),('pg_ctl'),('aptitude'),('pip'),('composer'),('drush'),('openssl'),('hub'),('task'),('pacaur'),('be'),('yum'),('jupyter'),('aws'),('zeus'),('softwareupdate'),('dnf'),('nmcli'),('apachectl'),('wp'),('dpkg'),('conda'),('gpg'),('gcloud'),('stack'),('mpc'),('cargo'),('react-native'),('emerge'),('terraform'),('rvm'),('tig'),('hadoop'),('/usr/bin/pacman'),('busybox'),('docker-machine'),('pipenv'),('pmset'),('zypper'),('./gradlew'),('bower'),('yadm'),('amixer');

In [15]:
%sql select count(*) from commands_with_subcommands;

 * sqlite:///results.db
Done.


count(*)
67


# Subcommand Shortcuts

In [40]:
%%sql
with total as (select count(*) as total from alias)
select
    count(*) as '#',
    round(count(*)*100.0/total,2) as '%'
from alias
join command using (alias_id)
join total
where alias.num_commands = 1
and command.num_arguments = 1
and command.name in commands_with_subcommands

 * sqlite:///results.db
Done.


#,%
194850,8.84


In [19]:
%%sql
select alias.name, alias.value
from alias join command using (alias_id)
where alias.num_commands = 1
and command.num_arguments = 1
and command.name in commands_with_subcommands
group by alias.name, alias.value
order by count(*) desc
limit 20;

 * sqlite:///results.db
Done.


name,value
gs,git status
gd,git diff
ga,git add
gb,git branch
gco,git checkout
gp,git push
be,bundle exec
gc,git commit
gst,git status
gl,git log


In [39]:
%%sql
with 
    total_cmd as (select name, count(distinct alias_id) as total_cmd from command group by name),
    total_abb as (
        select count(distinct alias_id) as total_abb 
        from alias join command using (alias_id) 
        where alias.num_commands = 1 
        and command.num_arguments = 1 
        and command.name in commands_with_subcommands 
    )
select 
    command.name, 
    count(distinct alias_id) as '#',
    round(count(distinct alias_id)*100.0/total_cmd,2) as '% of command',
    round(count(distinct alias_id)*100.0/total_abb,2) as '% of all subcommand abbreviations'
from alias join command using (alias_id)
join total_cmd on command.name = total_cmd.name
join total_abb
where alias.num_commands = 1
and command.num_arguments = 1
and command.name in commands_with_subcommands
group by command.name
order by 2 desc
limit 10;

 * sqlite:///results.db
Done.


name,#,% of command,% of all subcommand abbreviations
git,113980,36.77,58.5
pacman,9918,68.67,5.09
bundle,7436,46.74,3.82
docker,5697,21.34,2.92
apt-get,5084,56.46,2.61
vagrant,4332,64.23,2.22
systemctl,3865,45.7,1.98
docker-compose,3291,30.64,1.69
npm,2936,28.88,1.51
brew,2758,33.55,1.42


### By Command

In [43]:
%%sql
select 
    cmd_name,
    count(distinct alias_id) as num,
    round(count(distinct alias_id)*100.0/total,2) as per
from (
    select command.name as cmd_name, alias_id
    from alias join command using (alias_id)
    where alias.num_commands = 1
    and command.num_arguments = 1
    and command.name in commands_with_subcommands
)
join command_alias_totals using (cmd_name)
group by cmd_name
having num > 1000 and per > 25
order by per desc, num desc

 * sqlite:///results.db
Done.


cmd_name,num,per
apt-cache,1787,90.94
pacman,9918,67.02
vagrant,4332,63.27
yaourt,1966,60.36
aptitude,1049,59.64
apt-get,5084,56.46
rails,2256,54.17
yay,1045,53.51
apt,2369,51.41
yarn,2269,49.84


# Subcommand Chains

In [55]:
%%sql
with total as (select count(*) as total from alias)
select 
    count(distinct alias_id) as '#', 
    round(count(distinct alias_id)*100.0/total,2) as '%'
from alias 
join command c1 using (alias_id)
join command c2 using (alias_id)
join total
where c1.name in commands_with_subcommands
and c1.name = c2.name
and c2.position = c1.position + 1
and (c2.operator = '&&' or c2.operator = ';');

 * sqlite:///results.db
Done.


#,%
22062,1.0


In [56]:
%%sql
select
    c1.name as cmd,
    c1.arguments,
    c2.arguments,
    count(*) as '#'
from alias 
join command c1 using (alias_id)
--join argument s1 on c1.command_id = s1.command_id
join command c2 using (alias_id)
--join argument s2 on c2.command_id = s2.command_id
where c1.name in commands_with_subcommands
and c1.name = c2.name
and c2.position = c1.position + 1
and (c2.operator = '&&' or c2.operator = ';')
--and s1.position = 0 and s2.position = 0
group by cmd, c1.arguments, c2.arguments
order by count(*) desc
limit 20;

 * sqlite:///results.db
Done.


cmd,arguments,arguments_1,#
brew,update,upgrade,1886
brew,upgrade,cleanup,1282
apt-get,update,upgrade,798
gem,update --system,update,615
npm,install npm -g,update -g,561
git,reset --hard,clean -dfx,445
gem,update,cleanup,412
git,pull,push,386
git,svn dcommit,push github master:svntrunk,381
git,push origin --all,push origin --tags,371


In [57]:
%%sql
select 
    alias.value,
    count(*) as '#'
from alias 
join command c1 using (alias_id)
join command c2 using (alias_id)
where c1.name in commands_with_subcommands
and c1.name = c2.name
and c2.position = c1.position + 1
and (c2.operator = '&&' or c2.operator = ';')
group by alias.value
order by count(*) desc
limit 20;

 * sqlite:///results.db
Done.


value,#
sudo softwareupdate -i -a ; brew update ; brew upgrade ; brew cleanup ; npm install npm -g ; npm update -g ; sudo gem update --system ; sudo gem update ; sudo gem cleanup,1475
brew update ; brew upgrade ; brew prune ; brew cleanup ; brew doctor,528
sudo apt-get update && sudo apt-get upgrade,501
git reset --hard && git clean -dfx,432
brew update && brew upgrade && brew cleanup,388
git svn dcommit && git push github master:svntrunk,381
git push origin --all && git push origin --tags,369
"git add -A ; git rm $(git ls-files --deleted) 2> /dev/null ; git commit --no-verify -m ""--wip-- [skip ci]""",312
git fetch origin -v ; git fetch upstream -v ; git merge upstream/master,296
git pull && git push,284


In [66]:
%%sql
select alias.name, alias.value
from alias
where alias.value like '%brew update%brew upgrade%'
group by alias.value, alias.name
order by count(*) desc
limit 20;

 * sqlite:///results.db
Done.


name,value
update,sudo softwareupdate -i -a ; brew update ; brew upgrade ; brew cleanup ; npm install npm -g ; npm update -g ; sudo gem update --system ; sudo gem update ; sudo gem cleanup
brewup,brew update ; brew upgrade ; brew prune ; brew cleanup ; brew doctor
update,sudo softwareupdate -i -a ; brew update ; brew upgrade --all ; brew cleanup ; npm install npm -g ; npm update -g ; sudo gem update --system ; sudo gem update
brewu,brew update && brew upgrade && brew cleanup && brew prune && brew doctor
bup,brew update && brew upgrade
brewup,brew update ; brew upgrade ; brew cleanup ; brew doctor
brewski,brew update && brew upgrade && brew cleanup ; brew doctor
update,sudo softwareupdate -i -a ; brew update ; brew upgrade ; brew cleanup ; npm update npm -g ; npm update -g ; sudo gem update --system ; sudo gem update
brewup,brew update && brew upgrade && brew cleanup
brewu,brew update && brew upgrade --all && brew cleanup && brew prune && brew doctor


In [58]:
%%sql
with total as (select count(distinct alias_id) as total from command where name = 'brew')
select 
    count(*) as '#', 
    round(count(*)*100.0/total,2) as '% of brew'
from alias join total
where alias.value like '%brew update%brew upgrade%';

 * sqlite:///results.db
Done.


#,% of brew
2308,28.08


In [79]:
%%sql
with total as (select name, count(distinct alias_id) as total from command group by name)
select
    c1.name,
    count(distinct alias_id) as '#',
    round(count(distinct alias_id)*100.0/total,2) as '% of command'
from alias 
join command c1 using (alias_id)
join command c2 using (alias_id)
left join total on c1.name = total.name
where c1.name in commands_with_subcommands
and c1.name = c2.name
and c2.position = c1.position + 1
and (c2.operator = '&&' or c2.operator = ';')
group by c1.name
order by count(*) desc

 * sqlite:///results.db
Done.


name,#,% of command
git,12063,3.89
brew,2889,35.15
apt-get,1834,20.37
apt,1026,23.24
npm,1111,10.93
gem,692,40.92
docker,806,3.02
adb,262,10.86
vagrant,322,4.77
docker-compose,257,2.39


In [82]:
%%sql
select 
    alias.value,
    count(*) as '#'
from alias 
join command c1 using (alias_id)
join command c2 using (alias_id)
where c1.name in commands_with_subcommands
and c1.name = c2.name
and c2.position = c1.position + 1
and (c2.operator = '&&' or c2.operator = ';')
and c1.name in ('npm')
group by alias.value
order by count(*) desc
limit 20;

 * sqlite:///results.db
Done.


value,#
sudo softwareupdate -i -a ; brew update ; brew upgrade ; brew cleanup ; npm install npm -g ; npm update -g ; sudo gem update --system ; sudo gem update ; sudo gem cleanup,295
sudo softwareupdate -i -a ; brew update ; brew upgrade --all ; brew cleanup ; npm install npm -g ; npm update -g ; sudo gem update --system ; sudo gem update,67
sudo softwareupdate -i -a ; brew update ; brew upgrade ; brew cleanup ; npm update npm -g ; npm update -g ; sudo gem update --system ; sudo gem update,43
sudo softwareupdate -i -a ; brew update ; brew upgrade ; brew cleanup ; npm install npm -g ; npm update -g ; sudo gem update --system ; sudo gem update,29
sudo softwareupdate -i -a ; brew update ; brew upgrade ; brew cleanup ; npm update npm -g ; npm update -g ; sudo gem update,28
npm install && npm test,28
npm install && npm start,27
sudo softwareupdate -i -a ; brew update ; brew upgrade ; brew cleanup ; mas upgrade ; npm install npm -g ; npm update -g ; sudo gem update --system ; sudo gem update ; sudo gem cleanup,17
brew_update ; npm install npm -g ; npm update -g ; sudo gem update --system ; sudo gem update --no-document,17
sudo softwareupdate -i -a ; brew update ; brew upgrade ; brew cleanup ; npm install npm -g ; npm update -g,16


In [84]:
%%sql
select 
    alias.name, alias.value, count(*) as '#'
from alias 
join command c1 using (alias_id)
join command c2 using (alias_id)
where c1.name in commands_with_subcommands
and c1.name = c2.name
and c2.position = c1.position + 1
and (c2.operator = '&&' or c2.operator = ';')
and c1.name = 'git'
group by alias.value
order by count(*) desc
limit 20;

 * sqlite:///results.db
Done.


name,value,#
gpristine,git reset --hard && git clean -dfx,432
git-svn-dcommit-push,git svn dcommit && git push github master:svntrunk,381
gpoat,git push origin --all && git push origin --tags,369
gwip,"git add -A ; git rm $(git ls-files --deleted) 2> /dev/null ; git commit --no-verify -m ""--wip-- [skip ci]""",312
gitpu,git fetch origin -v ; git fetch upstream -v ; git merge upstream/master,296
yolo,git pull && git push,284
gfr,git fetch && git rebase,274
gu,git add . && git commit && git push,248
git-aac,git add -A && git commit -m,214
nah,git reset --hard && git clean -df,203


In [87]:
%%sql
select
    c1.name,
    s1.name,
    s2.name,
    count(*) as '#'
from alias 
join command c1 using (alias_id)
join argument s1 on c1.command_id = s1.command_id
join command c2 using (alias_id)
join argument s2 on c2.command_id = s2.command_id
where c1.name in commands_with_subcommands
and c1.name = c2.name
and c2.position = c1.position + 1
and (c2.operator = '&&' or c2.operator = ';')
and s1.position = 0 and s2.position = 0
and c1.name = 'git'
group by 1, 2, 3
order by count(*) desc
limit 20;

 * sqlite:///results.db
Done.


name,name_1,name_2,#
git,add,commit,2650
git,commit,push,1161
git,pull,push,1015
git,reset,clean,897
git,fetch,rebase,759
git,push,push,758
git,checkout,pull,738
git,config,config,382
git,fetch,merge,382
git,svn,push,382


---
### By Command

In [45]:
%%sql
select 
    cmd_name,
    count(distinct alias_id) as num,
    round(count(distinct alias_id)*100.0/total,2) as per
from (
    select c1.name as cmd_name, alias_id
    from alias 
    join command c1 using (alias_id)
    join command c2 using (alias_id)
    where c1.name in commands_with_subcommands
    and c1.name = c2.name
    and c2.position = c1.position + 1
    and (c2.operator = '&&' or c2.operator = ';')
)
join command_alias_totals using (cmd_name)
group by cmd_name
having num > 1000 and per > 25
order by per desc, num desc

 * sqlite:///results.db
Done.


cmd_name,num,per
brew,2889,33.77
