In [None]:
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.multioutput import MultiOutputRegressor
import pandas as pd
from model import TopSecretHack, Models

file = "Daten.xlsx"

# Main Variables
assembly_data = pd.read_excel(file, sheet_name = "dataset1")
initial_data = pd.read_excel(file, sheet_name = "initialinspection")
final_data = pd.read_excel(file, sheet_name = "finalinspection")
result_data = pd.read_excel(file, sheet_name = "Final")

# Extracting specific columns
aux = initial_data.iloc[:,-3:]
features = pd.concat([assembly_data.iloc[:,2:], aux], axis = 1)
targets = initial_data.iloc[:,[0,2,8,9,10,11,12]]

# Loading features and targets
dataset = TopSecretHack()
features, targets = dataset.load_datasets(file)

# One Hot Encoding on categorical variables of targets
targets = dataset.one_hotencoding(targets, column = -1)
targets.drop(columns = 3, inplace = True)

# Handling Missing Data
features = dataset.missing_values(features)
targets = dataset.missing_values(targets)

# Splitting dataset and building the model
x_train, x_test, y_train, y_test = train_test_split(features, targets, test_size = 0.25, 
                                                            random_state = 42)
        
# Building model
model = XGBRegressor(learning_rate = 0.01, n_estimators = 700, max_depth = 20, subsample = 0.8,
                                 colsample_bytree = 1, gamma = 1)
model = MultiOutputRegressor(model)
model.fit(x_train, y_train)



In [29]:
from sklearn.metrics import r2_score

y_pred = model.predict(x_test)
r2 = r2_score(y_test, y_pred)
r2

0.07151360675729991

In [18]:
import numpy as np
y_pred = pd.DataFrame(model.predict(x_test))
for i in range(4):
    y_pred.iloc[:,i] = np.around(y_pred.iloc[:,i])

In [20]:
y_test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
15894,0.0,0.0,0.0,1.0,2.33,22.07,0.78,0.74,0.81,0.92
1944,1.0,0.0,0.0,0.0,12.80,19.95,0.26,0.50,0.81,1.16
14611,0.0,0.0,0.0,1.0,8.64,39.47,1.33,1.22,1.09,1.28
2680,0.0,1.0,0.0,0.0,11.35,31.23,1.04,1.01,0.95,0.99
9957,1.0,0.0,0.0,0.0,7.81,43.04,1.46,1.39,1.61,1.92
5118,1.0,0.0,0.0,0.0,3.62,22.61,0.43,0.63,0.82,0.95
8741,1.0,0.0,0.0,0.0,1.28,10.72,0.27,0.30,0.34,0.42
2034,0.0,1.0,0.0,0.0,5.52,16.93,0.84,0.80,0.58,0.52
4708,1.0,0.0,0.0,0.0,4.62,24.90,0.64,0.66,0.76,0.94
14131,1.0,0.0,0.0,0.0,13.13,34.08,0.75,0.85,1.13,1.75


In [27]:
y_pred

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.0,0.0,0.0,0.0,4.260708,17.434036,0.811490,0.778095,0.773662,0.888392
1,0.0,0.0,0.0,0.0,5.377056,15.440707,0.741958,0.712319,0.687814,0.758194
2,0.0,0.0,0.0,0.0,4.902144,18.429346,0.816362,0.787823,0.796465,0.938332
3,0.0,0.0,0.0,0.0,5.429176,15.378551,0.729988,0.703919,0.683888,0.757505
4,0.0,0.0,0.0,1.0,4.489393,21.019924,0.916627,0.878334,0.911337,1.050393
5,0.0,0.0,0.0,0.0,5.204697,15.079143,0.731752,0.705623,0.685243,0.762227
6,0.0,0.0,0.0,0.0,4.759191,17.608225,0.777007,0.761437,0.772585,0.864043
7,0.0,0.0,0.0,0.0,4.656559,15.359739,0.738627,0.705753,0.683173,0.758609
8,0.0,0.0,0.0,0.0,5.437634,15.745795,0.729988,0.705431,0.691263,0.772033
9,0.0,0.0,0.0,0.0,4.891603,18.429346,0.818356,0.787823,0.795164,0.941123
