# importing necessary libraries

In [208]:
import pandas as pd
import json
import numpy as np
from sklearn.model_selection import train_test_split
    

# converting json to pandas for further processing

In [209]:
df = pd.read_json("./bids.json")


In [210]:
df.head(20)

Unnamed: 0,app,bid_price,win,events
0,A,0.01,0,100000
1,A,0.01,1,0
2,A,0.1,0,7000
3,A,0.1,1,3000
4,A,0.2,0,8000000
5,A,0.2,1,2000000
6,A,0.4,0,700000
7,A,0.4,1,300000
8,A,0.5,0,80000
9,A,0.5,1,20000


# create dataset by duplicating rows based on number of events, otherwise events number will bring noise into model training. 

In [211]:
df = df.loc[df.index.repeat(df.events)].reset_index(drop=True)

# Seperating into input and target variables

In [214]:
X = df['bid_price']
y = df['win']



# Reshaping X dataset to feed into training

In [215]:
X = X.values.reshape(-1, 1)

# Training using LogisticRegression as it's simplest for this dataset. 

In [254]:
#splitting into training and testing

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1, stratify=y)


In [255]:
from sklearn import linear_model
logr = linear_model.LogisticRegression()
logr.fit(X_train,y_train)


LogisticRegression()

In [258]:
logr.score(X_train, y_train)

0.7926715806190296

In [257]:
predictions=logr.predict(X_test)
logr.score(X_test, y_test)

0.7926819215381002

# Reading again initial dataset for predictions

In [248]:
X_initial = pd.read_json("./bids.json")


# Predicting and assigning probability of winning to new columns as the answer to 1st problem

In [259]:
X_predict = X_initial[['bid_price']]
X_predict = X_predict.values.reshape(-1, 1)

y_pred = logr.predict(X_predict) 
probs = logr.predict_proba(X_predict).T
np.set_printoptions(suppress=True)

In [260]:
X_initial['predictions']= y_pred
X_initial['probability_win'] = probs[1]

In [261]:
X_initial.head(20)

Unnamed: 0,app,bid_price,win,events,predictions,probability_win
0,A,0.01,0,100000,0,0.135667
1,A,0.01,1,0,0,0.135667
2,A,0.1,0,7000,0,0.16309
3,A,0.1,1,3000,0,0.16309
4,A,0.2,0,8000000,0,0.198606
5,A,0.2,1,2000000,0,0.198606
6,A,0.4,0,700000,0,0.286128
7,A,0.4,1,300000,0,0.286128
8,A,0.5,0,80000,0,0.337628
9,A,0.5,1,20000,0,0.337628


# Below i will write a function that will find the optimal value of bidding_price through iteration. Since the logistic regression works as y = ax + b and the a is logt.coef_ below and the b is logr.intercept_
# all i have to do is increase by 0.1 bid value when it crosses the threshold of 0.5 of activation function (which determines 0 or 1, lose or win classification)

In [262]:
logr.coef_
int_coef = logr.coef_[0][0]
print(int_coef)

2.4038225698288613


In [263]:
logr.intercept_
int_intercept = logr.intercept_[0]
print(int_intercept)

-1.8757938051801806


In [264]:
def optimal_price(int_coef, int_intercept):
   price = 0.1
   while True:
    if (int_coef*price)+int_intercept >= 0:
        return price
    else:
        price += 0.1



# the threshold value (the optimal bidding price) has been found - 0.79 

In [265]:
result = optimal_price(int_coef, int_intercept)
print(result)

0.7999999999999999


# And that's the answer to 2nd problem.  as a test case, i will feed 0.78 and 0.79 to prediction service and see the difference below

In [269]:
predicted = logr.predict(np.array([0.78]).reshape(-1,1))
predicted1 = logr.predict(np.array([0.79]).reshape(-1,1))
print(predicted, predicted1)

[0] [1]
