# Developing Formula

## Data

In [1]:
data = {
    'react': {'stars': 115000, 'forks': 20000, 'watches': 6000, 'commits': 10500, 'releases': 103},
    'angular': {'stars': 42000, 'forks': 10000, 'watches': 3000, 'commits': 11000, 'releases': 270},
    'vue': {'stars': 118000, 'forks': 16000, 'watches': 5000, 'commits': 2600, 'releases': 200},
    'lodash': {'stars': 35000, 'forks': 3000, 'watches': 800, 'commits': 7900, 'releases': 400},
    'dojo': {'stars': 1000, 'forks': 500, 'watches': 100, 'commits': 3900, 'releases': 217},
    'stackedit': {'stars': 13000, 'forks': 2000, 'watches': 500, 'commits': 264, 'releases': 115}, 
}

In [3]:
import pandas as pd

df = pd.DataFrame(data)
df.head(10)

In [4]:
df.dtypes

react        int64
angular      int64
vue          int64
lodash       int64
dojo         int64
stackedit    int64
dtype: object

In [5]:
df = df.T

In [6]:
df.head()

Unnamed: 0,commits,forks,releases,stars,watches
react,10500,20000,103,115000,6000
angular,11000,10000,270,42000,3000
vue,2600,16000,200,118000,5000
lodash,7900,3000,400,35000,800
dojo,3900,500,217,1000,100


## Read sample data for formula

In [7]:
df = pd.read_pickle('./formula/sample_pickle.pkl')

In [8]:
print(df)


                                   nameWithOwner           createdAt  \
facebook_react                    facebook/react 2013-05-24 16:15:54   
vuejs_vue                              vuejs/vue 2013-07-29 03:24:51   
torvalds_linux                    torvalds/linux 2011-09-04 22:48:12   
Valormmm_RannerBackend  Valor-mmm/Ranner-Backend 2018-03-25 15:12:51   
benweet_stackedit              benweet/stackedit 2013-03-24 13:46:06   

                                 updatedAt   stars  releases  isFork  \
facebook_react         2018-11-10 11:16:21  115156        71   False   
vuejs_vue              2018-11-10 11:33:44  118611       187   False   
torvalds_linux         2018-11-10 11:34:15   65672         0   False   
Valormmm_RannerBackend 2018-09-25 19:55:56       0         0   False   
benweet_stackedit      2018-11-10 09:21:49   13017        12   False   

                        forkCount  commitCount  \
facebook_react              20770        10496   
vuejs_vue                   16859 

## Forula Checks

### Apply Formula

In [9]:
def apply_formula(formula, name, dataframe):
    new_row = {}
    for index, row in dataframe.iterrows():
        new_row[index] = formula(row)

    dataframe[name] = pd.Series(new_row)

### Attempt 1

$stars +  forkCount + watchers$

In [10]:
def formula1(series):
    value = series.stars + series.forkCount + series.watchers
    return value

### Attempt 2

$\frac{stars + forkCount + watchers}{(inactive_days + 1) * 1.2}$

In [11]:
from datetime import datetime


def formula2(series):
    positive = series.stars + series.forkCount + series.watchers
    timedelta = datetime.now() - series.updatedAt
    value = positive / ((timedelta.days + 1) * 1.2)
    return value


## Attempt 3

$\frac{stars + forkCount + watchers + project_days}{(inactive_days + 1) * 1.2}$

In [12]:
from datetime import datetime


def formula3(series):
    active_timedelta = series.updatedAt - series.createdAt
    positive = series.stars + series.forkCount + series.watchers + active_timedelta.days
    timedelta = datetime.now() - series.updatedAt
    value = positive / ((timedelta.days + 1) * 1.2)
    return value

### Attempt 4

$\frac{stars + forkCount + watchers + project_days + (commits / 8)}{inactive_days +1}$

In [13]:
from datetime import datetime


def formula4(series):
    active_timedelta = series.updatedAt - series.createdAt
    commit_value = series.commitCount / 8
    positive = series.stars + series.forkCount + series.watchers + active_timedelta.days + commit_value
    timedelta = datetime.now() - series.updatedAt
    value = positive / (timedelta.days + 1)
    return value


## Comparision

In [14]:
apply_formula(formula1, 'formula1', df)
apply_formula(formula2, 'formula2', df)
apply_formula(formula3, 'formula3', df)
apply_formula(formula4, 'finalFormula', df)

In [15]:
analysis_df = df[['stars', 'formula1', 'formula2', 'formula3', 'finalFormula']]

In [15]:
analysis_df.to_pickle('./formula/formula_analysis_df.pkl')

In [17]:
print(df.describe())

               stars    releases     forkCount    commitCount     watchers  \
count       5.000000    5.000000      5.000000       5.000000     5.000000   
mean    62491.200000   54.000000  12704.600000  162055.400000  3855.000000   
std     55421.063556   79.959365  10998.885912  354833.675324  3304.159197   
min         0.000000    0.000000      0.000000      88.000000     0.000000   
25%     13017.000000    0.000000   1991.000000     264.000000   525.000000   
50%     65672.000000   12.000000  16859.000000    2673.000000  5666.000000   
75%    115156.000000   71.000000  20770.000000   10496.000000  6530.000000   
max    118611.000000  187.000000  23903.000000  796756.000000  6554.000000   

          diskUsage      formula1      formula2      formula3  finalFormula  
count  5.000000e+00       5.00000      5.000000      5.000000      5.000000  
mean   5.072048e+05   79050.80000  32937.833333  33655.485816  50513.992287  
std    1.006712e+06   67915.38165  28298.075688  28545.301281  