# SelfBuildingModel
[www.vexpower.com](www.vexpower.com)

In [1]:
# Set the right folder

import sys
import os

if not os.path.isdir("mmm"):
    module_path = os.path.abspath(os.path.join('..'))
    if module_path not in sys.path:
        sys.path.append(module_path)

import mmm

In [2]:
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.3f' % x) # suppress scientific notation

# Load dataset

file_name = "GoolyBib-ABT - Sheet1.csv"
data = pd.read_csv('../data/'+file_name)
data.head()

Unnamed: 0,date,revenue,facebook_cost,affiliate_cost,tv_cost
0,2020-04-26,30706.27,5831.18,3196.74,0.0
1,2020-04-27,32945.17,5771.89,2871.31,0.0
2,2020-04-28,33317.07,5763.95,2806.83,0.0
3,2020-04-29,37614.49,6371.92,2916.7,0.0
4,2020-04-30,37704.89,5734.35,2349.41,0.0


In [None]:
from IPython.display import display

from mmm.clean import make_column_index
from mmm.engineer import add_constant
from mmm.select import get_all_X_labels, guess_date_column, guess_y_column, guess_media_columns, backwards_feature_elimination, find_best_feature 
from mmm.build import run_regression, create_results_df, create_pred_df
from mmm.validate import calculate_r2
from mmm.display import display_accuracy_chart, save_model, display_contrib_chart, display_decomp_chart


class SelfBuildingModel():
    def __init__(self, file_name):
        # load data
        self.df = pd.read_csv('../data/'+file_name)
        
        # Guess labels
        self.date_label = guess_date_column(self.df)
        make_column_index(self.df, self.date_label)
        add_constant(self.df)
        self.y_label = guess_y_column(self.df)
        self.X_labels = get_all_X_labels(self.df, self.y_label)
        self.media_labels = guess_media_columns(self.df)
        self.base_labels = [l for l in self.X_labels if l not in self.media_labels]
        
        # Set placeholders
        self.coefficients = None
        self.p_values = None
        self.error_label = "R2"
        self.error_func = calculate_r2
        self.error_value = None
        self.y_actual = None
        self.y_pred = None
        self.pred_df = None         
        
        # Self-build model
        self._ffs()
        
    def _ffs(self):
        self.find()
        y_label, error_value, X_labels, coefficients = self.fit()
        save_model(y_label, error_value, X_labels, coefficients)
        self.show()
        
    def find(self):
        # bfe on base variables
        base_keep = backwards_feature_elimination(self.df, self.y_label, self.base_labels)
        
        # find best adstock and diminishing return rate of each media variable
        best_media_labels = []
        for m in self.media_variables:
            adstock_columns = add_adstocks(self.df, m)
            best_adstock_column = find_best_feature(self.df, self.y_label, adstock_columns, base_keep)
            
            diminishing_columns = add_diminishing_returns(self.df, best_adstock_column)
            
            best_diminishing_column = find_best_feature(self.df, self.y_label, diminishing_columns, base_keep)
            best_media_labels.append(best_diminishing_column)
            
        self.best_X_labels = base_keep + best_media_labels
        
    def fit(self):
        y_actual, y_pred, coefficients, p_values = run_regression(self.df, self.y_label, self.best_X_labels)
        self.y_actual, self.y_pred, self.coefficients, self.p_values = y_actual, y_pred, coefficients, p_values
        
        self.error_value = self.error_func(self.y_actual, self.y_pred)
        
        self.results_df = create_results_df(self.X_labels, self.coefficients, self.p_values)
        
        self.pred_df = create_pred_df(self.df, self.results_df)
        
        return self.y_label, self.error_value, self.X_labels, self.coefficients
    
    def show(self):
        display(self.results_df)

        display_accuracy_chart(self.y_actual, self.y_pred, self.y_label, 
                               accuracy=(self.error_label, self.error_value))
        
        display_contrib_chart(self.pred_df)
        display_decomp_chart(self.pred_df)