In [None]:
import os
import os.path
import datetime as dt
import textwrap
import json
import pathlib as pth

import joblib
import pandas as pd
import numpy as np
import altair as alt
from altair.vega.v4 import Vega

import codemetrics as cm
import codemetrics.vega

%matplotlib inline
def retina():
    return {
        'config': {
            'view': {
                'height': 400,
                'width': 400 * 1.61,
            },
        }
    }
alt.themes.register('retina', retina)
alt.themes.enable('retina')

In [None]:
import importlib
#importlib.reload(cm)
importlib.reload(cm.git)
importlib.reload(cm.core)
#importlib.reload(cm.vega)
#importlib.reload(cm.cloc)
#importlib.reload(cm.internals)

In [None]:
disk = joblib.Memory(location=os.getenv('TEMP'), verbose=0)
get_cloc = disk.cache(cm.get_cloc)
get_git_log = disk.cache(cm.get_git_log)
year_ago = dt.datetime.now(tz=dt.timezone.utc) - dt.timedelta(365)
target_location = pth.Path.home() / 'Documents' / 'Github' / 'pandas'
cwd = os.path.abspath(os.path.curdir)
os.chdir(target_location)
get_cloc.clear()

In [None]:
cloc_program = str(pth.Path.home() / 'scripts' / 'cloc-1.64.exe')
print(cloc_program)
loc = get_cloc(cloc_program=cloc_program)

In [None]:
loc_sum = loc.groupby('language').sum().reset_index().melt(id_vars=['language']).rename(columns={'variable': 'type', 'value': 'lines'})
alt.Chart(loc_sum).mark_bar().encode(
    x=alt.X('lines:Q'),
    y=alt.Y('language:N', sort=alt.EncodingSortField(field='lines', op='sum', order='descending')),
    color=alt.Color('type:N', scale=alt.Scale(scheme='accent')), 
    tooltip=['lines:Q', 'type:O'],
).properties(title='Lines of code')

In [None]:
log = get_git_log(path='.', after=year_ago, git_client='git.exe')
log['issue'] = log['message'].str.extract(r'\(#(\d+)\)')
log = pd.merge(log, loc[['path']], left_on='path', right_on='path')

In [None]:
ages = cm.get_ages(log).groupby('path').min()[['age']].reset_index().merge(loc)  # FIXME BUG in get_ages.
ages['last_change'] = ages['age'].apply(lambda a: pd.to_datetime('today') - dt.timedelta(a))
ages['age_in_week'] = ages['age'].apply(lambda d: int(d / 7))
ages.head()

In [None]:
width = 1000
weeks = list(range(int(400 / 7)))
chart = alt.Chart(ages).encode(color='language')
top = chart.mark_bar().\
    encode(x=alt.X('age_agg:O', sort='ascending', title='age in weeks', scale=alt.Scale(domain=weeks)),
           y=alt.Y('count(path):Q', title='Number of files'),
           color=alt.Color('language', scale=alt.Scale(scheme='tableau10')),
           tooltip=['count(path)', 'language']
          ).\
    transform_calculate(age_agg='floor(datum.age / 7)').\
    properties(width=width)
bottom = chart.mark_tick(size=60, thickness=2, opacity=.3).\
    encode(x=alt.X('age:Q', title='age in days'),
           tooltip='path').properties(width=width)
alt.vconcat(top, bottom)

In [None]:
desc = cm.vega.vis_ages(ages, height=500, width=500)
Vega(desc)

In [None]:
path_rev_df = log[['path', 'date']].groupby('path').max().reset_index().\
    query("path.str.endswith('.py')").merge(log[['path', 'date', 'revision']])[['path', 'revision']]
get_complexity = disk.cache(cm.get_complexity)
complexity = get_complexity(path_rev_df, cm.git.download_files)

In [None]:
path_avg_complexity = complexity[['path', 'cyclomatic_complexity', 'token_count']].\
    groupby('path').quantile(.8).sort_values(by='cyclomatic_complexity', ascending=False).reset_index().rename(columns={'cyclomatic_complexity': 'avg_cc80'})

In [None]:
loc_cc = pd.merge(loc, path_avg_complexity)

In [None]:
hspots = cm.get_hot_spots(log, loc_cc) 
hspots.query("language == 'Python'").sort_values(by=['changes', 'avg_cc80'], ascending=False).head()
hspots.head(10)

In [None]:
desc = cm.vega.vis_hot_spots(hspots, width=500, height=500, size_column='avg_cc80')
Vega(desc)

In [None]:
co_changes = cm.get_co_changes(log, by='path', on='issue').query('coupling > .6').sort_values(by=['changes', 'cochanges'], ascending=False)
co_changes.head()

In [None]:
path = 'pandas/core/generic.py'
func_df = log[log['path'] == path][['date', 'revision', 'path']]

In [None]:
func_cc_df = pd.merge(func_df, get_complexity(func_df, cm.git.download_files), on=['revision', 'path'])
top_func_df = func_cc_df.groupby('name')[['cyclomatic_complexity']].mean().nlargest(8, 'cyclomatic_complexity')
filt_func_cc_df = func_cc_df[func_cc_df['name'].isin(set(top_func_df.index))]

In [None]:
alt.Chart(filt_func_cc_df).\
    mark_line().encode(
        x='date:T',
        y='cyclomatic_complexity:Q',
        color='name:N',
        tooltip=['name']
    )