The purpose of this file is to calculate monthly weighted DMM score.

In [1]:
import pandas as pd

In [4]:
commit_data = 'commit_data_average.csv'
export_csv_name = 'test.csv'

In [5]:
df = pd.read_csv(
    commit_data,
    parse_dates=['commit_date']
)

dmm_cols = ['dmm_unit_size', 'dmm_unit_complexity', 'dmm_unit_interfacing']
df['dmm'] = df[dmm_cols].mean(axis=1)

df['month'] = df['commit_date'].dt.to_period('M').dt.to_timestamp()

churn_df = (
    df
    .groupby(['url','month'])['churn']
    .sum()
    .reset_index(name='churn_sum')
)

weighted_df = (
    df
    .assign(weighted_dmm=df['dmm'] * df['churn'])
    .groupby(['url','month'])['weighted_dmm']
    .sum()
    .reset_index(name='weighted_dmm_sum')
)

monthly = pd.merge(churn_df, weighted_df, on=['url','month'])
monthly = monthly.sort_values(['url','month'])

monthly['dmm_monthly_w']    = monthly['weighted_dmm_sum'] / monthly['churn_sum']
monthly['cum_churn']        = monthly.groupby('url')['churn_sum'].cumsum()
monthly['cum_weighted_dmm'] = monthly.groupby('url')['weighted_dmm_sum'].cumsum()
monthly['dmm_cumulative_w'] = monthly['cum_weighted_dmm'] / monthly['cum_churn']

monthly.to_csv(export_csv_name, index=False)