In [1]:
import math
import datetime
import numpy as np
import pandas as pd

from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, Grid, LinearAxis, Plot, Range1d
from bokeh.models import DatetimeTickFormatter
from bokeh.io import output_notebook

from sklearn import datasets, linear_model

output_notebook()

from bokeh.io import output_file
output_file('index.html')

In [2]:
# Prepare data
sets = {
    'hume': {
        'mu': pd.read_csv('stats_mymapper_hume_20170129.csv', sep=',',
                     parse_dates=['tidspunkt'],
                     index_col='tidspunkt'
                    ),
        'cc': pd.read_csv('stats_ccmapper_hume_20170129.csv', sep=',',
                     parse_dates=['tidspunkt'],
                     index_col='tidspunkt'
                    ),
        },
    'real': {
        'mu': pd.read_csv('stats_mymapper_real_20170129.csv', sep=',',
                     parse_dates=['tidspunkt'],
                     index_col='tidspunkt'
                    ),
        'cc': pd.read_csv('stats_ccmapper_real_20170129.csv', sep=',',
                     parse_dates=['tidspunkt'],
                     index_col='tidspunkt'
                    ),
    }
}
for a in sets.values():
    for b in a.values():
        b.index = pd.DatetimeIndex(b.index).normalize()

dfi = pd.date_range('2016-08-24', periods=160, freq='D')
df = {}
for k, v in sets.items():
    df[k] = pd.DataFrame(index=dfi)
    df[k] = df[k].join(pd.concat([v['mu'], v['cc']], axis=0))
    df[k]['idx'] = range(0, df[k].shape[0])
    df[k] = df[k].dropna()


In [3]:
#sm = 418 + np.arange(42) / 42. * (754-418)
#sm.shape

for k, v in df.items():
    diff1 = df[k].loc['2016-12-03']['begreper'] - df[k].loc['2016-12-04']['begreper']
    diff2 = df[k].loc['2017-01-19']['begreper'] - df[k].loc['2017-01-18']['begreper']
    print(diff1, diff2)

    sm = diff1 + np.arange(42) * (diff2 - diff1)/42.;
    df[k]['Smoothed'] = pd.concat([
        df[k].loc[:'2016-12-03']['begreper'],
        df[k].loc['2016-12-04':'2017-01-18']['begreper'] + sm ,
        df[k].loc['2017-01-19':]['begreper'],
        ])


-1.0 344.0
418.0 754.0


In [8]:
joined_df = {}
speed = {}
for k, v in df.items():
    x = df[k]['idx'].values.reshape(-1,1)
    y = df[k]['Smoothed'].values

    model = linear_model.LinearRegression()
    model.fit(x,y)
    speed[k] = model.coef_[0]
    print(k,': y=',speed[k],'x +', model.intercept_ )

    predictions = model.predict(x)
    x[:,0].shape
    predictions=pd.DataFrame(data=predictions, index=df[k].index.values, columns=['Fit'])

    joined_df[k] = df[k].join(predictions, how='inner')

real : y= 14.7953500039 x + 1438.18746015
hume : y= 35.5768830921 x + 3388.1994536


In [9]:
p = figure(title='Ferdigstilte begreper')
#p.line(joined_df['real'].index, joined_df['real']['begreper'], color='blue')
p.line(joined_df['real'].index, joined_df['real']['Smoothed'], color='brown')
p.line(joined_df['real'].index, joined_df['real']['Fit'],
       line_width=2, color="green",
       legend='Realfagstermer: {:.0f} begreper / dag'.format(speed['real'])
      )

#p.line(joined_df['hume'].index, joined_df['hume']['begreper'], color='orange')
p.line(joined_df['real'].index, joined_df['hume']['Smoothed'], color='brown')
p.line(joined_df['hume'].index, joined_df['hume']['Fit'], line_width=2, color="red",
      legend='Humord: {:.0f} begreper / dag'.format(speed['hume'])
      )

p.legend.location = 'top_left'

p.xaxis.formatter=DatetimeTickFormatter(
        hours=["%d %B %Y"],
        days=["%d %B %Y"],
        months=["%d %B %Y"],
        years=["%d %B %Y"],
    )

p.xaxis.major_label_orientation = math.pi/4

show(p)

In [39]:
cl = {
    'hume': (9236./.51 - 9236),
    'real': (3822./.27 - 3822),
}

# OBS: Sommerferien er ikke med her, så man bør nok legge på noen uker

for k in cl.keys():
    dn = cl[k] / speed[k]
    expe = datetime.datetime.now() + datetime.timedelta(days=dn)
    print('{:s}: {:.0f} concepts left / {:.0f} concepts/day = {:.0f} days needed => complete at {:%Y-%m-%d}'.format(
            k, cl[k], speed[k], dn, expe))

hume: 8874 concepts left / 36 concepts/day = 249 days needed => complete at 2017-10-06
real: 10334 concepts left / 15 concepts/day = 698 days needed => complete at 2018-12-29
