In [1]:
import sqlite3
import pandas as pd
import stringdist
import matplotlib.pyplot as plt
%matplotlib inline
%load_ext sql

In [2]:
%sql sqlite:///results.db

## Autocorrect

Candidates for autocorrect are all aliases with one command, where the alias name is the same length as the command name. We first exclude aliases of length two, because of the common occurence of stuff like `ll=ls -l`, but then add those two-character aliases whose names are palindromes of their commands, like `sl=ls` or `dc=cd`.

In [5]:
%%sql candidates <<
select alias.name, command.name
from alias join command using (alias_id)
where alias.num_commands = 1
and length(alias.name) = length(command.name)
and length(alias.name) > 2

union

select alias.name, command.name
from alias join command using (alias_id)
where alias.num_commands = 1
and length(alias.name) = length(command.name)
and length(alias.name) = 2
and substr(alias.name,-1,1) = substr(command.name,1,1)
and substr(alias.name,1,1) = substr(command.name,-1,1);

 * sqlite:///results.db
Done.
Returning data to local variable candidates


Now we need to empirically determine a good cutoff for the distance measure. This means playing around with the following code:

In [77]:
import stringdist

autocorrects = []

for (alias,cmd) in candidates:
    dist = stringdist.rdlevenshtein(alias, cmd)
    if dist > 0 and dist <= 2:
        autocorrects.append((alias, cmd, dist))

autocorrects.sort(key=lambda tup: tup[2], reverse=True)
      
print('candidates: %d' % len(candidates))
print('autocorrects: %d' % len(autocorrects))
#print(*autocorrects[:200], sep='\n')

candidates: 33261
autocorrects: 9195


Seems like 2 is a good cutoff.

In [56]:
total_r = %sql select count(*) from alias;
total = total_r[0][0]
autocorr = len(autocorrects);
round(autocorr*100.0/total,2)

 * sqlite:///results.db
Done.


0.42

## Disabling built-in zsh autocorrect

In [72]:
%%sql
with total as (select count(*) as total from alias)
select count(*) as '#', round(count(*)*100.0/total,2) as '%'
from alias join command using (alias_id)
join total
where command.name = 'nocorrect'

 * sqlite:///results.db
Done.


#,%
7326,0.33


In [75]:
%%sql
select alias.name, count(*)
from alias join command using (alias_id)
where command.name = 'nocorrect'
group by alias.name
order by count(*) desc
limit 10;

 * sqlite:///results.db
Done.


name,count(*)
mv,884
mkdir,837
cp,695
rm,405
man,360
sudo,276
mysql,217
gist,191
heroku,187
ln,173
