In [1]:
# imports

import numpy as np
import xgboost as xgb
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# constants

path = "./dataset.csv"

In [3]:
# read data

data = pd.read_csv(path, header = None, names = ['aoa', 'Re', 'mc', 'mcp', 'th', 'up', 'lp'])

In [4]:
y = data[['up', 'lp']]
y_u = data['up']
y_u_sep = (data['up']<1).astype(int)
#y_l = data['lp']
X = data.drop(['up', 'lp'], axis = 1)


In [5]:
X_u_train, X_u_test, y_u_train, y_u_test = train_test_split(X, y_u, test_size=0.33, random_state=42)
#X_l_train, X_l_test, y_l_train, y_l_test = train_test_split(X, y_l, test_size=0.33, random_state=42)
_, _, y_u_sep_train, y_u_sep_test = train_test_split(X, y_u_sep, test_size=0.33, random_state=42)

In [6]:
X_u_train.values[0]

array([3.32472449e+00, 5.44494129e+07, 8.41022648e+00, 3.37200237e+01,
       2.82241616e+00])

In [7]:
#X_l_train.values[0]

In [8]:
y_u_sep_train[4]

1

In [9]:
# Quick baseline

model_u = xgb.XGBRegressor(n_estimators=1000, max_depth=12, eta=0.1, subsample=1, colsample_bytree=1)
model_l = xgb.XGBRegressor(n_estimators=1000, max_depth=12, eta=0.1, subsample=1, colsample_bytree=1)

classify_model = xgb.XGBClassifier(n_estimators=1000, max_depth=18, eta=0.1, subsample=1, colsample_bytree=1)


In [10]:
model_u.fit(X_u_train, y_u_train)

#model_l.fit(X_l_train, y_l_train)

classify_model.fit(X_u_train, y_u_sep_train)



XGBClassifier(eta=0.1, max_depth=18, n_estimators=1000)

In [11]:
def loss(y, yhat):
    return np.mean(np.sqrt((y-yhat)**2))

In [12]:
y_u_sep_hat = classify_model.predict(X_u_test)

In [13]:
accuracy_score(y_u_sep_hat, y_u_sep_test)

0.9706908583391486

In [14]:
y_u_hat = model_u.predict(X_u_test)
#y_l_hat = model_l.predict(X_l_test)

In [15]:
loss(y_u_hat, y_u_test)

0.043963851892597224

In [16]:
# Let's give it an extra boost

y_u_test

3978    1.000000
1448    0.933791
2664    1.000000
17      1.000000
1634    0.002040
          ...   
572     1.000000
4278    0.961556
288     1.000000
1615    1.000000
2330    1.000000
Name: up, Length: 1433, dtype: float64

In [17]:
y_u_sep_test

3978    0
1448    1
2664    0
17      0
1634    1
       ..
572     0
4278    1
288     0
1615    0
2330    0
Name: up, Length: 1433, dtype: int64

In [18]:
y_u_sep_hat

array([0, 1, 0, ..., 0, 0, 0])

In [19]:
# Now we combine the greater certainty of the classifier to fix regression 1's values

def boost_predictions(regression_preds, classification_preds):
    return ((regression_preds-1)*classification_preds)+1

In [20]:
y_u_hat_boosted = boost_predictions(y_u_hat, y_u_sep_hat)

In [21]:
loss(y_u_hat_boosted, y_u_test)

0.038836548499662366

In [43]:
def predict_sep_points(aoa, re, mc, mcp, thick):
    x_in = pd.DataFrame(data=[[aoa, re, mc, mcp, thick]], columns=['aoa', 'Re', 'mc', 'mcp', 'th'])
    y_hat = model_u.predict(x_in)[0]
    separates = classify_model.predict(x_in)[0]
    return ((y_hat-1)*separates)+1

In [44]:
predict_sep_points(4, 60000, 4.5, 60, 26)

0.22020438313484192

In [29]:
np.array([4, 3000, 10, 15, 30])

array([   4, 3000,   10,   15,   30])

In [31]:
a = np.array([np.array([4, 3000, 10, 15, 30])])

In [37]:
a = pd.DataFrame(data=[[4, 3000, 10, 15, 30]], columns=['aoa', 'Re', 'mc', 'mcp', 'th'])

In [38]:
model_u.predict(a)

array([0.22209579], dtype=float32)