## Step 1: Obtain data by generating randomly

In [None]:
import random

def generate_sample_data_3d(coeff1, coeff2, num_samples = 40, x3_spread = 30.0):
    data = []
    for x1 in range(0, num_samples):
        for x2 in range(0, num_samples):
            x3 = round(coeff1 * x1 + coeff2 * x2 + x3_spread * (random.random() - 0.5), 2)
            data.append((x1, x2, x3))
    return data

In [None]:
coeff1 = random.uniform(1.0, 2.0)
coeff2 = random.uniform(1.0, 2.0)
print("Coefficients during generation: ", (coeff1, coeff2))
data = generate_sample_data_3d(coeff1, coeff2)

## Step 2: Divide the data into two sets: a training set and a testing set

In [None]:
def split_training_test(data, proportion_train = 0.5):
    indices = random.sample(range(len(data)), int(proportion_train * len(data)))
    train = []
    test = []
    for i in range(len(data)): 
        if i in indices: train.append(data[i])
        else: test.append(data[i])
    return (train, test)

In [None]:
(data_train, data_test) = split_training_test(data)
print(data_train)
print(data_test)

In [None]:
%matplotlib nbagg
import matplotlib.pyplot as pyplot
from mpl_toolkits.mplot3d import Axes3D

x_train = [p[:-1] for p in data_train]
x3_train = [p[-1] for p in data_train]

fig = pyplot.figure(1)
ax = Axes3D(fig)

x1_train = [p[0] for p in data_train]
x2_train = [p[1] for p in data_train]
ax.scatter(x1_train, x2_train, x3_train)
fig.show()

## Step 3: Run linear regression on training data

In [None]:
from sklearn import linear_model

regr = linear_model.LinearRegression()
regr.fit(x_train, x3_train)

print('Estimated coefficient: ', regr.coef_)
print('Estimated intercept:   ', regr.intercept_)

## Step 4: Predict the y value of the testing data from its x values

In [None]:
x_test = [p[:-1] for p in data_test]
x3_test = [p[-1] for p in data_test]
x3_pred = regr.predict(x_test)

fig = pyplot.figure(2)
ax = Axes3D(fig)

x1_test = [p[0] for p in data_test]
x2_test = [p[1] for p in data_test]
ax.scatter(x1_test, x2_test, x3_test)
ax.scatter(x1_test, x2_test, x3_pred, alpha=0.2)
fig.show()

## Step 5: Measure how far the predicted y values are from the real y values in the testing data set

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

mse = mean_squared_error(x3_test, x3_pred)
r2score = r2_score(x3_test, x3_pred)
print("Mean squared error: {:.2f}".format(mse))
print("R2 score: {:.2f}".format(r2score))