# Solution planning

**Business problem**

- Check if new sign up button in the website has a conversion rate better, equal or worse than current one

Observations

* Conversion rate would be related to number of clicks in sign up button. That would be number of sign up clicks (success) divided by total number of visits
* The current page has a blue color button and the alternative has a red color.

## Input

- Data acquired through website


## Tasks

- Design what type of test will be used (Multi-Armed Bandit with Thompson agent)
- Choose of metric of interest
- Definition of posteriori distribution (beta-binomial distribution)
- Definition of priori distribution (beta distribution)
- Define maximum accepted loss ( 5% )
- Design test infrastructure (define endpoint, API, two websites, agent)
- Design two 'fake' websites with different buttons colors
- Create an API in flask to run websites
- Create a webscrapping
- Getting data
- Calculate probability of page B be better than page A

# Imports and helper functions

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import beta
from scipy.stats import norm
from flask import Flask, render_template, redirect, url_for, request
from matplotlib.animation import FuncAnimation
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np 
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import numpy as np
import time

#Function that calculates probability of B being better than A
def bayesian_inference(data):
    N_mc = 10000
    proba_b_better_a = []
    expected_loss_a = []
    expected_loss_b = []
    for day in range (len(data)):
        u_a, var_a = beta.stats(a = 1 + data.loc[day, 'acc_click_A'],
                                      b = 1 + (data.loc[day,'acc_visit_A'] -  data.loc[day, 'acc_click_A']),
                                      moments = 'mv'
        )
        
        u_b, var_b = beta.stats(a = 1 + data.loc[day, 'acc_click_B'],
                                      b = 1 + (data.loc[day,'acc_visit_B'] -  data.loc[day, 'acc_click_B']),
                                      moments = 'mv'
        )
        
        # Sampling of a normal distribution using u_a and var_a
        x_a = np.random.normal(loc = u_a,
                               scale = 1.25*np.sqrt(var_a),
                               size = N_mc
        )

         # Sampling of a normal distribution using u_b and var_b
        x_b = np.random.normal(loc = u_b,
                               scale = 1.25*np.sqrt(var_b),
                               size = N_mc
        )

        # Beta distribution function of page A
        pdf_beta_a = beta.pdf(x_a,
                            a = 1 + data.loc[day, 'acc_click_A'],
                            b = 1 + (data.loc[day,'acc_visit_A'] -  data.loc[day, 'acc_click_A'])
        )

        pdf_beta_b = beta.pdf(x_b,
                            a = 1 + data.loc[day, 'acc_click_B'],
                            b = 1 + (data.loc[day,'acc_visit_B'] -  data.loc[day, 'acc_click_B'])
        )

        # Normal distribution function of page A
        pdf_normal_a = norm.pdf(x_a,
                                      loc = u_a,
                                      scale = 1.25*np.sqrt(var_a)
        )

         # Normal distribution function of page B
        pdf_normal_b = norm.pdf(x_b,
                                      loc = u_b,
                                      scale = 1.25*np.sqrt(var_b)
        )

        # Beta / Normal
        y = (pdf_beta_a*pdf_beta_b) / (pdf_normal_a*pdf_normal_b)

        # Values where B is better than A
        y_b = y[x_b >= x_a]

        # Probability of B being A
        p = (1/N_mc) * np.sum(y_b)

        # Expected error
        expected_loss_A = (1/N_mc) * np.sum(((x_b - x_a) * y)[x_b >= x_a])
        expected_loss_B = (1/N_mc) * np.sum(((x_a - x_b) * y)[x_a >= x_b])

        proba_b_better_a.append(p)
        expected_loss_a.append(expected_loss_A)
        expected_loss_b.append(expected_loss_B)

    return proba_b_better_a, expected_loss_a, expected_loss_b


def animate( i ):
    data = pd.read_csv('data_experiment.csv')

    # dtypes
    data['click'] = data['click'].astype(int)
    data['visit'] = data['visit'].astype(int)

    data = data.reset_index().rename(columns={'index':'day'})
    data = data.pivot(index='day', columns='group', values=['click', 'visit']).fillna(0)
    data.columns = ['click_control', 'click_treatment','visit_control', 'visit_treatment']
    data = data.reset_index(drop=True)

    data['acc_visit_A'] = data['visit_control'].cumsum()
    data['acc_click_A'] = data['click_control'].cumsum()
    data['acc_visit_B'] = data['visit_treatment'].cumsum()
    data['acc_click_B'] = data['click_treatment'].cumsum()

    # inference bayesian
    p, expected_loss_a, expected_loss_b = bayesian_inference(data)

    x1 = np.arange(len(p))
    

    plt.cla()
    plt.plot(x1,p, label='Probability B better A')
    plt.plot(x1,expected_loss_a, label='Risk Choosing A')
    plt.plot(x1,expected_loss_b, label='Risk Choosing B')

    plt.legend(loc='upper left')
    plt.tight_layout()

ani = FuncAnimation(plt.gcf(), animate, interval=1000)  

plt.tight_layout()
plt.show()

# Design API 

In [None]:
app = Flask( __name__)

@app.route( '/')
def index():
    # get data
    df = pd.read_csv('data_experiment.csv')

    df['no_click'] = df['visit'] - df['click']
    click_array = df.groupby('group').sum().reset_index()[['click', 'no_click']].T.to_numpy()

    # Thompson Agent
    prob_reward = np.random.beta( click_array[0], click_array[1])

    if np.argmax(prob_reward) == 0:
        return render_template( 'page01.html')
    else:
        return render_template( 'page02.html')


@app.route( '/yes', methods=['POST'] )
def yes_event():
    df = pd.read_csv('data_experiment.csv')
    
    if request.form['yescheckbox'] == 'red':
        click = 1
        visit = 1
        group = 'treatment'
    else:
        click = 1
        visit = 1
        group = 'control'

    df_raw = pd.DataFrame({'click': click, 'visit': visit, 'group':group}, index= [0])
    df = pd.concat([df,df_raw])
    df.to_csv('data_experiment.csv', index=False)

    return redirect( url_for ( 'index') )


@app.route( '/no', methods=['POST'] )
def no_event():
    df = pd.read_csv('data_experiment.csv')

    if request.form['nocheckbox'] == 'red':
        click = 0
        visit = 1
        group = 'treatment'
    else:
        click = 0
        visit = 1
        group = 'control'

    df_raw = pd.DataFrame({'click': click, 'visit': visit, 'group':group}, index = [0])
    df = pd.concat([df,df_raw])
    df.to_csv('data_experiment.csv', index=False)

    return redirect( url_for ( 'index') )


if __name__ == '__main__':
    app.run(port=5000)

# Design webscrapper

In [None]:
path_webdriver ='/home/dbcordeiro/repos/isketch_website_mab_testing/chromedriver_linux64/chromedriver'
options = webdriver.ChromeOptions()
options.add_experimental_option("detach", True)
driver = webdriver.Chrome(service=Service(path_webdriver), options=options )

driver.get('http://127.0.0.1:5000/')

clicks=10000
for click in range(clicks):
    button_color = driver.find_element('name', 'yescheckbox').get_attribute('value')

    if button_color == 'blue':
        if np.random.random() < 0.3:
            driver.find_element( 'name', 'yescheckbox').click()
            driver.find_element('id', 'yesbtn').click()
            time.sleep(1)
        else:
            driver.find_element( 'name', 'nocheckbox').click()
            driver.find_element('id', 'nobtn').click()
            time.sleep(1)
    else:
        if np.random.random() < 0.35:
            driver.find_element( 'name', 'yescheckbox').click()
            driver.find_element('id', 'yesbtn').click()
            time.sleep(1)
        else:
            driver.find_element( 'name', 'nocheckbox').click()
            driver.find_element('id', 'nobtn').click()
            time.sleep(1)

# Decision plot of probability of B better than A

In [None]:
ani = FuncAnimation(plt.gcf(), animate, interval=1000) 
plt.tight_layout()
plt.show()