In [1]:
from mlrepricer import match, setup, helper, schemas
import altair as alt
import numpy as np
import pandas as pd
import warnings
import datetime as dt
import matplotlib.pyplot as plt


from mlrepricer.database import SQLite

alt.data_transformers.enable('default', max_rows=100000000)

DataTransformerRegistry.enable('default')

In [2]:
t = schemas.pricemonitor(SQLite)()  # tableobject
df = pd.read_sql_query(f'SELECT * FROM {t.table}', t.conn, parse_dates=[t.eventdate], index_col='ID')

In [3]:
filter1 = helper.cleanup(df)
main = helper.normalize(filter1)

In [4]:
main = main.reset_index().set_index(['asin', 'sellerid'])
main = pd.get_dummies(main)

In [21]:
main.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ID,feedback,feedbackpercent,isbuyboxwinner,price,time_changed,shipping_time_0,shipping_time_46,shipping_time_48,shipping_time_72
asin,sellerid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
B074XTQL3D,ADBIN7JOKZABU,1,0.878661,0.9604,1,14.17344,2018-05-13 10:31:00.953,0,0,0,1
B074XTQL3D,A6SCPSJ316ST,2,0.313549,0.8281,0,14.17344,2018-05-13 10:31:00.953,0,0,1,0
B074XTQL3D,A2EHWGAW6J9W8Q,3,0.738088,0.9604,0,14.45691,2018-05-13 10:31:00.953,0,0,0,1
B074XTQL3D,A1TY2HQMSLJUR2,4,0.363759,0.7921,0,17.52354,2018-05-13 10:31:00.953,0,0,0,1
B074XTQL3D,A392IUIG1QL0VK,5,0.523644,0.7744,0,21.41481,2018-05-13 10:31:00.953,0,0,1,0


In [7]:
main['time_asin'] = main.time_changed.dt.strftime(date_format="%Y-%m-%d %H:%M:%S:%f") +'_'+ main.asin
main = main[['time_asin','price', 'isprime', 'isbuyboxwinner']]

AttributeError: 'DataFrame' object has no attribute 'asin'

In [None]:
idx = main.reset_index().groupby(['time_asin'])['isbuyboxwinner'].idxmax()
# we need those index values twice
# m = lookup['index'].values
lookup = main.iloc[idx].set_index('time_asin')
assert (lookup['isbuyboxwinner']==1).all
lookup.drop('isbuyboxwinner', axis=1, inplace=True)

In [None]:
removedmax = main.iloc[~idx].set_index('time_asin')

In [None]:
result = pd.merge(removedmax, lookup, how='left', left_index=True, right_index=True, suffixes=('','_buybox'))

In [None]:
# we can't and dont want to compare prime and prime. Same forn nonprime and nonprime.
filter1 = result[result.isprime!=result.isprime_buybox]

In [None]:
conditions = [filter1.isprime==1, filter1.isprime_buybox==1]
choices = [filter1.price, filter1.price_buybox]

filter1['prime_price'] = np.select(conditions, choices, None)

conditions = [filter1.isprime==0, filter1.isprime_buybox==0]
choices = [filter1.price, filter1.price_buybox]

filter1['nonprime_price'] = np.select(conditions, choices, None)

In [None]:
price = alt.Chart(filter1).mark_point(opacity=0.4).encode(
    y=alt.Y('nonprime_price'),
    x=alt.X('prime_price', scale=alt.Scale(zero=False)),
    color=('isprime_buybox:N')).interactive()
x = np.arange(25)
# here you should but in your function from the statsmodel
data = pd.DataFrame({'nonprime_price': x*0.859-0.5670,
                     'prime_price': x})

regr = alt.Chart(data).mark_line().encode(
    x='prime_price',
    y='nonprime_price'
)

In [None]:
price + regr

In [None]:
# we have three states for a point, prime wins buybox, nonprime wins buybox, or its a tie
# then we have labeled data we can make something like a linear regression with 3 classes
# that is called logistic regression and can be solved by Softmax regression
conditions = [(filter1.isbuyboxwinner==0) & (filter1.isprime_buybox==1), (filter1.isbuyboxwinner==0) & (filter1.isprime_buybox==0), filter1.isbuyboxwinner==1]
choices = [0, 1, 2]
filter1['state'] = np.select(conditions, choices, None)

In [None]:
X = filter1[['nonprime_price', 'prime_price']].values
y = filter1['state'].values.astype('int')

In [None]:
from sklearn.linear_model import LogisticRegression
softmax_reg = LogisticRegression(multi_class='multinomial', solver='lbfgs', C=1)
softmax_reg.fit(X, y)

In [None]:
softmax_reg.predict([[5, 8.3]])

In [None]:
8.3*0.859-0.5670

In [None]:
from sklearn.svm import SVC

In [None]:
svm = SVC(kernel='poly', C=10.0, random_state=0)

In [None]:
svm.fit(X, y)

In [None]:
m = plot_decision_regions(X, y, classifier=svm)

In [None]:
resultdf = pd.DataFrame(Z)
%matplotlib inline
plt.figure(1, figsize=(4, 3))
plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)

In [None]:
xx.ravel()

In [None]:
Z.ravel()

In [None]:
from matplotlib.colors import ListedColormap


def versiontuple(v):
    return tuple(map(int, (v.split("."))))

def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02):

    # setup marker generator and color map
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    cmap = ListedColormap(colors[:len(np.unique(y))])

    # plot the decision surface
    x2_min, x2_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    x1_min, x1_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))
    Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
    Z = Z.reshape(xx1.shape)
    plt.figure(1, figsize=(10, 10))
    plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap)
    plt.xlim(xx1.min(), xx1.max())
    plt.ylim(xx2.min(), xx2.max())

    for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0], 
                    y=X[y == cl, 1],
                    alpha=0.6, 
                    c=cmap(idx),
                    edgecolor='black',
                    marker=markers[idx], 
                    label=cl)

    # highlight test samples
    if test_idx:
        # plot all samples
        if not versiontuple(np.__version__) >= versiontuple('1.9.0'):
            X_test, y_test = X[list(test_idx), :], y[list(test_idx)]
            warnings.warn('Please update to NumPy 1.9.0 or newer')
        else:
            X_test, y_test = X[test_idx, :], y[test_idx]

        plt.scatter(X_test[:, 0],
                    X_test[:, 1],
                    c='',
                    alpha=1.0,
                    edgecolor='black',
                    linewidths=1,
                    marker='o',
                    s=55, label='test set')