In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import xgboost as xgb

In [2]:
class LinearRegression:
    def __init__(self, learning_rate=0.001, n_iters=1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape

        # init parameters
        self.weights = np.zeros(n_features)
        self.bias = 0

        # gradient descent
        for _ in range(self.n_iters):
            y_predicted = np.dot(X, self.weights) + self.bias
            # compute gradients
            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)

            # update parameters
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        y_approximated = np.dot(X, self.weights) + self.bias
        return y_approximated

In [3]:
df = pd.read_excel('training_set.xlsx', index_col=0)  

In [4]:
targetDF = pd.read_excel('target_problems.xlsx', index_col=0)

In [5]:
df1 = df.iloc[:25,:] #Arate1
df2 = df.iloc[25:50,:] #Arate2
df3 = df.iloc[50:75,:] #Arate3
df4 = df.iloc[75:100,:] #Arate4

In [6]:
xgb_model = xgb.XGBRegressor(objective="reg:squarederror", random_state=42)

In [7]:
model = LinearRegression(learning_rate=0.01, n_iters=1000)

In [8]:
#train model
x1 = df1[['a1', 'pa1', 'a2', 'Est_A0']]
y1 = df1['Arate1']

xgb_model.fit(x1, y1)

#predict Arate1
x1_target = targetDF[['a1', 'pa1', 'a2', 'Est_A0']]

y1_pred = xgb_model.predict(x1_target)
targetDF['Arate1'] = y1_pred

In [9]:
#train model
x2 = df2[['a1', 'pa1', 'a2', 'Est_A0']]
y2 = df2['Arate2']

xgb_model.fit(x2, y2)

#predict Arate2
x2_target = targetDF[['a1', 'pa1', 'a2', 'Est_A0']]

y2_pred = xgb_model.predict(x2_target)
targetDF['Arate2'] = y2_pred

In [10]:
#train model
x3 = df3[['a1', 'pa1', 'a2', 'Est_A0']]
y3 = df3['Arate3']

xgb_model.fit(x3, y3)

#predict Arate3
x3_target = targetDF[['a1', 'pa1', 'a2', 'Est_A0']]

y3_pred = xgb_model.predict(x3_target)
targetDF['Arate3'] = y3_pred

In [11]:
#train model
x4 = df4[['a1', 'pa1', 'a2', 'Est_A0']]
y4 = df4['Arate4']

xgb_model.fit(x4, y4)

#predict Arate4
x4_target = targetDF[['a1', 'pa1', 'a2', 'Est_A0']]

y4_pred = xgb_model.predict(x4_target)
targetDF['Arate4'] = y4_pred

In [12]:
#train model
x5 = df[['a1', 'pa1', 'a2', 'Est_A0']]
y5 = df['AAAbest']

xgb_model.fit(x5, y5)

#predict AAAbest
x5_target = targetDF[['a1', 'pa1', 'a2', 'Est_A0']]

y5_pred = xgb_model.predict(x5_target)
targetDF['AAAbest'] = y5_pred

In [13]:
#train model
x6 = df[['a1', 'pa1', 'a2', 'corr', 'Est_A0']]
y6 = df['AAAnotb']

xgb_model.fit(x6, y6)

#predict AAAnotb
x6_target = targetDF[['a1', 'pa1', 'a2', 'corr', 'Est_A0']]

y6_pred = xgb_model.predict(x6_target)
targetDF['AAAnotb'] = y6_pred

In [14]:
#train model
x7 = df[['a1', 'pa1', 'a2', 'pb1', 'Est_A0']]
y7 = df['ABAbest']

xgb_model.fit(x7, y7)

#predict ABAbest
x7_target = targetDF[['a1', 'pa1', 'a2', 'pb1', 'Est_A0']]

y7_pred = xgb_model.predict(x7_target)
targetDF['ABAbest'] = y7_pred

In [15]:
#train model
x8 = df[['a1', 'pa1', 'a2', 'pb1', 'corr', 'Est_A0']]
y8 = df['ABAnotb']

xgb_model.fit(x8, y8)

#predict ABAnotb
x8_target = targetDF[['a1', 'pa1', 'a2', 'pb1', 'corr', 'Est_A0']]

y8_pred = xgb_model.predict(x8_target)
targetDF['ABAnotb'] = y8_pred

In [16]:
targetDF[20:]

Unnamed: 0_level_0,a1,pa1,a2,b1,pb1,b2,corr,n,Est_A0,Arate1,Arate2,Arate3,Arate4,AAAbest,AAAnotb,ABAbest,ABAnotb
prob,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
421,1,0.13,-1,-2,0.25,0,0,21,0.62,0.386185,0.886684,0.335558,0.626889,0.660536,0.631402,0.381337,0.246749
422,0,0.30,1,3,0.91,-67,0,22,0.86,0.720218,0.875025,0.290931,0.685966,0.969403,0.737702,0.337490,0.298914
423,3,1.00,0,1,0.30,4,-1,24,1.00,0.711235,0.877631,0.729343,0.503179,0.768462,0.200852,0.596122,0.181424
424,1,1.00,0,-1,0.70,6,0,20,0.80,0.653803,0.583838,0.397033,0.516870,0.751498,0.721929,0.405764,0.263130
425,2,1.00,0,4,0.18,2,0,20,0.35,0.590529,0.421040,0.209445,0.427317,0.827609,0.804998,0.218244,0.401737
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
496,2,1.00,0,12,0.08,2,-1,23,0.13,0.559763,0.286128,0.230589,0.427000,0.621570,0.522680,0.070423,0.139485
497,1,0.89,-2,-3,0.41,3,0,25,0.92,0.386284,0.639894,0.857725,0.504527,0.792270,0.629319,0.514812,0.290016
498,2,1.00,0,4,0.91,-50,0,29,0.69,0.549127,0.434049,0.757397,0.467050,0.626297,0.863671,0.352536,0.205940
499,2,1.00,0,1,0.93,45,-1,23,0.48,0.568485,0.420849,0.231450,0.411226,0.747614,0.702179,0.203151,0.238999


In [17]:
targetDF.to_excel("target_predictions.xlsx")