In [7]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from linearRegression import *
import plotly
import plotly.graph_objs as go

data = pd.read_csv("data/non-linear-regression-x-y.csv", sep=',')
train_data = data.sample(frac=0.8)
test_data = data.drop(train_data.index)

input_param_name = 'x'
output_param_name = 'y'

x_train = train_data[[input_param_name]].values
y_train = train_data[[output_param_name]].values

x_test = test_data[[input_param_name]].values
y_test = test_data[[output_param_name]].values

In [9]:
# 训练数据和测试数据的绘制

mode = "markers"
marker = {
    'size':10,
    'opacity':1,
    'line': {
        'color': 'rgb(255,255,255)',
        'width':1
    }
}

x_train_ = x_train[:,0]
y_train_ = y_train[:,0]
name_train_ = "training set"
plot_training_trace = go.Scatter(
    x=x_train_,
    y=y_train_,
    name=name_train_,
    mode=mode,
    marker=marker
)

x_test_ = x_test[:,0]
y_test_ = y_test[:,0]
name_test_ = "test set"
plot_test_trace = go.Scatter(
    x=x_test_,
    y=y_test_,
    name=name_test_,
    mode=mode,
    marker=marker
)

plot_layout = go.Layout(
    title='Date Sets',
    scene={
        'xaxis': {'title': input_param_name},
        'yaxis': {'title': output_param_name},
    },
    margin={'l': 0, 'r': 0, 'b': 0, 't': 0}
)

plot_data = [plot_training_trace, plot_test_trace]
plot_figure = go.Figure(data=plot_data, layout=plot_layout)
plotly.offline.iplot(plot_figure)

In [25]:
# 训练
# 对比 - 非线性
lr = LinearRegression(polynomial_degree=0, sinusoid_degree=0)
lr.fit(x_train, y_train, alpha=0.01, num_iters=10000)
print("损失：",lr.cost_hist[-1])

# 求测试集的预测结果
y_pre = lr.predict(x_test)

# 决策边界
x_deci = np.linspace(0,250,1000)
y_deci = lr.predict(x_deci)

x_deci_ = x_deci.flatten()
y_deci_ = y_deci.flatten()
name_deci_ = "decision boundary"
plot_deci_trace = go.Scatter(
    x=x_deci_,
    y=y_deci_,
    name=name_deci_,
    mode='lines',
    marker=marker
)

plot_data = [plot_deci_trace, plot_training_trace, plot_test_trace]
plot_figure = go.Figure(data=plot_data, layout=plot_layout)
plotly.offline.iplot(plot_figure)

损失： 394.90777433067956


In [50]:
# 训练
# 对比 - 线性 polynomial_degree=2
lr = LinearRegression(polynomial_degree=2, sinusoid_degree=0)
lr.fit(x_train, y_train, alpha=0.01, num_iters=100000)
print("损失：",lr.cost_hist[-1])

# 求测试集的预测结果
y_pre = lr.predict(x_test)

# 决策边界
x_deci = np.linspace(0,250,1000)
y_deci = lr.predict(x_deci)

x_deci_ = x_deci.flatten()
y_deci_ = y_deci.flatten()
name_deci_ = "decision boundary"
plot_deci_trace = go.Scatter(
    x=x_deci_,
    y=y_deci_,
    name=name_deci_,
    mode='lines',
    marker=marker
)

plot_data = [plot_deci_trace, plot_training_trace, plot_test_trace]
plot_figure = go.Figure(data=plot_data, layout=plot_layout)
plotly.offline.iplot(plot_figure)

损失： 247.991226880044


In [74]:
# 训练
# 对比 - 线性 polynomial_degree=7, sinusoid_degree=7
lr = LinearRegression(polynomial_degree=6, sinusoid_degree=10)
lr.fit(x_train, y_train, alpha=0.008, num_iters=300000)
print("损失：",lr.cost_hist[-1])

# 求测试集的预测结果
y_pre = lr.predict(x_test)

# 决策边界
x_deci = np.linspace(0,250,1000)
y_deci = lr.predict(x_deci)

x_deci_ = x_deci.flatten()
y_deci_ = y_deci.flatten()
name_deci_ = "decision boundary"
plot_deci_trace = go.Scatter(
    x=x_deci_,
    y=y_deci_,
    name=name_deci_,
    mode='lines',
    marker=marker
)

plot_data = [plot_deci_trace, plot_training_trace, plot_test_trace]
plot_figure = go.Figure(data=plot_data, layout=plot_layout)
plotly.offline.iplot(plot_figure)

损失： 50.10514925401085
