# Count my lines of code

In [28]:
import json
from pathlib import Path
import collections

In [7]:
# set up paths
my_projs = Path('../')
etcbc_projs = [
    Path('../../etcbc/heads'), 
    Path('../../etcbc/lingo/heads'),
    Path('../../etcbc/genre_synvar')
]
camb_projs = [
    p for p in Path('../../CambridgeSemiticsLab/').glob('*')
        if p.name not in {'nena', '.DS_Store', '.gitignore', '.git'}
]

In [9]:
proj_dirs = [] # append them here

proj_dirs += etcbc_projs
proj_dirs += camb_projs

for p in my_projs.glob('*'):
    if not p.name.startswith('.'):
        proj_dirs.append(p)
    
print(len(proj_dirs), 'projects logged')
proj_dirs

22 projects logged


[PosixPath('../../etcbc/heads'),
 PosixPath('../../etcbc/lingo/heads'),
 PosixPath('../../etcbc/genre_synvar'),
 PosixPath('../../CambridgeSemiticsLab/nena_tf'),
 PosixPath('../../CambridgeSemiticsLab/nena_corpus'),
 PosixPath('../../CambridgeSemiticsLab/nena_research'),
 PosixPath('../../CambridgeSemiticsLab/time_collocations'),
 PosixPath('../../CambridgeSemiticsLab/Gesenius_data'),
 PosixPath('../tischendorf_tf'),
 PosixPath('../pyling'),
 PosixPath('../noun_semantics'),
 PosixPath('../verb_in_biblical_hebrew'),
 PosixPath('../opencv_play'),
 PosixPath('../CATSS_parsers'),
 PosixPath('../my_loc'),
 PosixPath('../verb_semantics'),
 PosixPath('../bhsa_vectors'),
 PosixPath('../where_do_you_mean'),
 PosixPath('../spacex_smartweets'),
 PosixPath('../phd-thesis-template'),
 PosixPath('../CCAT'),
 PosixPath('../tfNotebooks')]

In [37]:
def get_notebook_lines(nb_path):
    """Retrieve lines of code from a notebook."""
    load_nb = json.loads(nb_path.read_text())
    try:
        cells = load_nb['cells']
    except:
        print('skipping', nb_path)
        return ''
    code = ''
    for cell in cells:
        if cell['cell_type'] == 'code':
            code += ''.join(cell['source'])
    return code

In [40]:
def count_codelines(code_str):
    """Count codelines in a file."""
    counter = 0
    for line in code_str.split('\n'):
        if line:
            counter += 1
    return counter
    
def count_all_codelines(directory, counter):
    """Recursively count Python codelines in a directory."""
    
    for path in directory.glob('*'):
        
        if path.name.endswith('.py'):
            code_str = path.read_text()
            counter[directory.name] += count_codelines(code_str)
            
        elif path.name.endswith('.ipynb'):
            code_str = get_notebook_lines(path)
            counter[directory.name] += count_codelines(code_str)
        
        elif path.is_dir() and not path.name.startswith('.'):
            count_all_codelines(path, counter) # recursive call

# Execute Counts

In [44]:
counter = collections.Counter()

for proj in proj_dirs:
    count_all_codelines(proj, counter)

In [45]:
print('counts by project: ')
counter.most_common(25) 

counts by project: 


[('heads', 6388),
 ('project_code', 3521),
 ('pyscripts', 2427),
 ('cxs', 2277),
 ('cx_dev', 2223),
 ('tools', 2213),
 ('exploratory', 2006),
 ('chapters', 1944),
 ('previous_phases', 1803),
 ('advb_article', 1758),
 ('qtl', 1599),
 ('dev', 1518),
 ('analysis', 1506),
 ('trees', 1449),
 ('parse', 1286),
 ('CATSS_parsers', 1228),
 ('gbi', 1202),
 ('wordsets', 1175),
 ('nena2tf', 1144),
 ('hebrew', 1141),
 ('cx_development', 1019),
 ('cx_analysis', 1018),
 ('initial_exploration', 998),
 ('text_parser', 978),
 ('Barwar', 970)]

In [46]:
# get total count
sum(counter.values())

68293