In [1]:
import numpy as np
from cmath import sin
from prepare import *

In [9]:
class LinearRegression(object):
    def __init__(self, data, label, polynomial_degree=0, sinusoid_degree=0, normalize=True):
        """
        data是一维(单个特征)或者二维数组(单个特征，多个特征)，label是一维数组([0,1,1])者二维数组([[0],[1],[1]])
        """
        # 如果 data 只有一格特征并且是一维，转换为二维
        if data.ndim == 1:
            data = data.reshape((-1,1))
        
        # 如果 label 是二维数组，转换为一维
        if label.ndim == 2:
            label = label.reshape((-1,))

        self.data = np.copy(data)
        self.label = np.copy(label)
        self.polynomial_degree = polynomial_degree
        self.sinusoid_degree = sinusoid_degree
        self.normalize = normalize

        # 初始化特征
        # 归一化（可选）,最后加一列，polynomial or sinusoid 转换
        self.data, self.feature_mean, self.feature_std = \
            prepare_for_training(self.data, self.polynomial_degree, self.sinusoid_degree, self.normalize)

        # 训练数据的数据数 与 特征数
        self.num_examples, self.num_features = self.data.shape

        # 初始化 theta
        self.theta = np.zeros((self.num_features,)) # 如果有5维特征，self.theta = [0,0,0,0,0]
        # self.theta = np.zeros((self.data.shape[1],1)) 

    def train(self, alpha = 0.01, num_iters = 500):
        """
        alpha: 学习率
        num_iters: 迭代次数
        """
        loss_hist = []
        for _ in range(num_iters):
            self.theta -= self.gradient()
            loss_ = self.loss(self.data, self.label)
            loss_hist.append()(loss_)

        return loss_hist

    
    def loss(self, data, label):
        """计算loss之前data要被处理过 - normalize等"""
        if data.ndim == 1:
            data = data[None,:]
        if label.ndim == 2:
            label = label.flatten()
        num_examples = data.shape[0]
        return (1 / 2 * num_examples) * np.sum(label - data * self.theta)

    def gradient(self):
        grads = np.dot(self.data.T, self.label - self.hypothesis(self.data))
        return grads

    
    def hypothesis(self, data):
        """
        data: 一维或者二维
        theta: 一维或者二维
        如果 data 和 theta 都是二维，返回的是二维(n,1)数组，否则返回一维数组
        """
        return np.dot(data,self.theta)
        
        



In [14]:
arr1 = np.arange(9).reshape(3,3)
arr2 = np.array([1,2,3]

array([[[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]]])

In [12]:
np.dot(arr1,arr2)

array([ 8, 26, 44])