In [1]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
%load_ext sql

In [2]:
%sql sqlite:///results.db

# Scripts

Any alias that is a composition of two or more commands.

In [4]:
%%sql
with total as (select count(*) as total from alias)
select 
    count(distinct alias_id) as '#', 
    round(count(distinct alias_id)*100.0/total,2) as '%'
from alias join command using (alias_id)
join total
where alias.num_commands != 1;


 * sqlite:///results.db
Done.


#,%
204142,9.26


In [6]:
%%sql
with total as (select count(*) as total from alias)
select num_commands, count(*) as '#', round(count(*)*100.0/total,2) as '%'
from alias 
join total
group by num_commands
order by count(*) desc
limit 10;

 * sqlite:///results.db
Done.


num_commands,#,%
1,2000057,90.74
2,137740,6.25
3,38178,1.73
4,15095,0.68
5,5799,0.26
6,3471,0.16
7,1481,0.07
8,998,0.05
9,642,0.03
10,263,0.01


### Most used operators

In [11]:
%%sql
with total as (select count(*) as total from command where operator is not null)
select operator, count(*), round(count(*)*100.0/total, 2) 
from command 
join total
where operator is not null
group by operator 
order by 2 desc

 * sqlite:///results.db
Done.


operator,count(*),"round(count(*)*100.0/total, 2)"
|,137000,39.66
;,102277,29.61
&&,92849,26.88
&,9653,2.79
||,2910,0.84
|&,746,0.22


### Operators by position

In [19]:
%%sql ops_pos << 
select operator, position as pos, count(*) as num 
from command
where operator is not null
group by position, operator 
order by operator, position asc, num desc

 * sqlite:///results.db
Done.
Returning data to local variable ops_pos


In [20]:
ops_pos = ops_pos.DataFrame().pivot(index='pos', columns='operator', values='num')[:11]
ops_pos = ops_pos.sort_values(ops_pos.first_valid_index(), axis=1, ascending=False)
ops_pos

operator,|,|&,&,;,&&,||
pos,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,14619.0,671.0,62.0,61.0,51.0,3.0
1,78082.0,62.0,8109.0,53208.0,62901.0,1780.0
2,25622.0,10.0,897.0,22083.0,17124.0,666.0
3,9923.0,3.0,249.0,11100.0,6744.0,205.0
4,4307.0,,102.0,5781.0,2824.0,115.0
5,2617.0,,54.0,3370.0,1247.0,42.0
6,903.0,,33.0,2215.0,686.0,22.0
7,464.0,,29.0,1502.0,368.0,15.0
8,149.0,,22.0,985.0,215.0,9.0
9,76.0,,15.0,467.0,170.0,10.0


### Pipelines

In [3]:
%%sql
with total as (select count(*) as total from alias)
select count(distinct alias_id) as '#', round(count(distinct alias_id)*100.0/total,2) as '%'
from alias join command using (alias_id)
join total
where alias.num_commands > 1
and alias_id not in
(select alias_id from command where command.operator != "" and command.operator != '|');

 * sqlite:///results.db
Done.


#,%
74719,3.39


In [11]:
%%sql
select command.name, count(*)
from alias join command using (alias_id)
where alias.num_commands > 1
and alias_id not in
(select alias_id from command where command.operator != "" and command.operator != '|')
group by command.name
order by count(*) desc
limit 20;

 * sqlite:///results.db
Done.


name,count(*)
grep,34487
xargs,10505
sort,10037
ps,8716
git,8017
ls,7303
head,6490
sed,5090
awk,4957
find,4445


In [13]:
%%sql
select command.name, command.position, count(*)
from alias join command using (alias_id)
where alias.num_commands > 1
and alias_id not in
(select alias_id from command where command.operator != "" and command.operator != '|')
and command.position = 0
group by command.name
order by count(*) desc
limit 20;

 * sqlite:///results.db
Done.


name,position,count(*)
ps,0,8714
git,0,7900
ls,0,7294
find,0,4444
history,0,3763
ifconfig,0,3141
du,0,3091
echo,0,2075
cat,0,1724
lsof,0,1357


In [14]:
%%sql
select command.name, command.position, count(*)
from alias join command using (alias_id)
where alias.num_commands > 1
and alias_id not in
(select alias_id from command where command.operator != "" and command.operator != '|')
and command.position = 1
group by command.name
order by count(*) desc
limit 20;

 * sqlite:///results.db
Done.


name,position,count(*)
grep,1,28833
sort,1,6492
xargs,1,4688
head,1,3479
less,1,2893
sed,1,2646
pbcopy,1,2389
egrep,1,1810
awk,1,1730
tr,1,1458


In [15]:
%%sql
select command.name, command.position, count(*)
from alias join command using (alias_id)
where alias.num_commands > 1
and alias_id not in
(select alias_id from command where command.operator != "" and command.operator != '|')
and command.position = 2
group by command.name
order by count(*) desc
limit 20;

 * sqlite:///results.db
Done.


name,position,count(*)
grep,2,4127
awk,2,2662
xargs,2,2349
head,2,1999
sort,2,1701
cut,2,1460
sed,2,1427
GET,2,843
pbcopy,2,589
tail,2,549


In [16]:
%%sql
select command.name, command.position, count(*)
from alias join command using (alias_id)
where alias.num_commands > 1
and alias_id not in
(select alias_id from command where command.operator != "" and command.operator != '|')
and command.position = 3
group by command.name
order by count(*) desc
limit 20;

 * sqlite:///results.db
Done.


name,position,count(*)
xargs,3,1934
tr,3,845
grep,3,744
cut,3,627
sort,3,581
head,3,477
uniq,3,462
awk,3,378
sed,3,371
tail,3,111


---
## By Command

In [18]:
%%sql
select 
    cmd_name,
    count(distinct alias_id) as num,
    round(count(distinct alias_id)*100.0/total,2) as per
from (
    select command.name as cmd_name, alias_id
    from alias join command using (alias_id)
    where alias.num_commands > 1
    and alias_id not in
    (select alias_id from command where command.operator != "" and command.operator != '|')
)
join command_alias_totals using (cmd_name)
group by cmd_name
having num > 1000 and per > 25
order by per desc, num desc

 * sqlite:///results.db
Done.


cmd_name,num,per
uniq,1742,89.84
sort,8965,82.99
cut,3956,81.58
xargs,10348,76.01
ifconfig,3141,75.63
awk,4605,73.59
tr,3477,71.76
head,6482,71.35
ps,8716,66.45
/usr/sbin/lsof,1124,65.85


This next one counts all commands that are preceded by a pipe, not just those in purely pipeline aliases.

In [20]:
%%sql
select 
    cmd_name,
    count(distinct alias_id) as num,
    round(count(distinct alias_id)*100.0/total,2) as per
from (
    select command.name as cmd_name, alias_id
    from command
    where command.operator = '|'
)
join command_alias_totals using (cmd_name)
group by cmd_name
having num > 1000 and per > 25
order by per desc, num desc

 * sqlite:///results.db
Done.


cmd_name,num,per
uniq,1882,97.06
sort,10367,95.97
head,8392,92.37
cut,4345,89.61
xargs,12185,89.5
column,1061,87.25
awk,5178,82.74
wc,1818,77.43
sed,5282,70.01
tr,2900,59.86


---
Alias names for pipelines

In [12]:
%%sql
select alias.name, count(distinct alias_id) as '#'
from alias join command using (alias_id)
where alias.num_commands > 1
and alias_id not in
(select alias_id from command where command.operator != "" and command.operator != '|')
group by alias.name
order by 2 desc
limit 100;

 * sqlite:///results.db
Done.


name,#
lsd,1827
ips,1407
lm,1230
psg,1106
httpdump,847
chromekill,715
h,638
c,633
gd,591
openPorts,583


In [38]:
%%sql
select count(*), alias.name, alias.value
from alias join command using (alias_id)
where alias.num_commands > 1
and alias_id not in
(select alias_id from command where command.operator != "" and command.operator != '|')
and alias.name = 'lsd'
group by alias.value, alias.name
order by 1 desc
limit 50;

 * sqlite:///results.db
Done.


count(*),name,value
1562,lsd,ls -lF ${colorflag} | grep --color=never '^d'
404,lsd,"ls -l | grep ""^d"""
206,lsd,"ls -lF ${colorflag} | grep ""^d"""
110,lsd,"ls -l ${colorflag} | grep ""^d"""
96,lsd,"CLICOLOR_FORCE=1 ll | grep --color=never ""^d"""
94,lsd,ls -lhF ${colorflag} | grep --color=never '^d'
74,lsd,ls -alF | grep /$
62,lsd,ls -Gal | grep ^d
56,lsd,ls -l | grep ^d
46,lsd,ls -lF --color | grep --color=never '^d'


In [44]:
%%sql
select count(*), alias.name, alias.value
from alias join command using (alias_id)
where alias.num_commands > 1
and alias_id not in
(select alias_id from command where command.operator != "" and command.operator != '|')
and alias.name = 'pg'
group by alias.value, alias.name
order by 1 desc
limit 10;

 * sqlite:///results.db
Done.


count(*),name,value
206,pg,ps aux | grep
102,pg,ps -Af | grep $1
70,pg,ps -ef | grep
26,pg,ps aux | grep -i
24,pg,ps ax | grep
20,pg,ping -c 1 google.com | tail -3
15,pg,ps ax | grep -v grep | grep -i
10,pg,ps -e | grep
10,pg,ps auwwx | grep -i -e ^USER -e
10,pg,ps aux | grep $1
