# 回归方程

In [None]:
from sklearn import linear_model
import matplotlib.pyplot as plt

Xi = [[2], [5], [9], [12], [14]]
Yi = [[3], [6], [8], [10], [13]]

In [None]:
model = linear_model.LinearRegression() # setting
model.fit(Xi, Yi) # model practice

In [None]:
print("y=", model.coef_, "x=", model.intercept_) # the function plot
print("R^2=", model.score(Xi, Yi))

In [None]:
y_plot = model.predict(Xi)

plt.scatter(Xi, Yi, color = 'red', label="sample data", linewidth = 2) # draw
plt.plot(Xi, y_plot, color = 'green', label = "regression data", linewidth =  2)

plt.legend()
plt.show()

# 信息熵

In [None]:
import numpy as np
e=-(0.5*np.log2(0.5)+0.25*np.log2(0.25)+0.25*np.log2(0.25))
print(e)

# 决策树

In [None]:
from sklearn import datasets
iris = datasets.load_iris()
X = iris['data']
Y = iris['target']
print(X.shape, Y.shape)
print(X[:3])
print(Y)

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt_clf = DecisionTreeClassifier()
dt_clf.fit(X, Y)

In [None]:
dt_clf.predict([[5,3,5,1.5]])

# 全连接神经网络实现

## 一. 读取数据

In [None]:
import pandas as pd
data = pd.read_csv('Advertising.csv') # read data from file.
data.head() # take the first five rows of data.
# the type of 'data' is Dataframe.

In [None]:
x = data.iloc[:,1:-1] # extract input data.
# the former is to define the column and the latter is to define the row.
y = data.iloc[:,-1] # extract the labels.
print(x)
print("____________")
print(y)

## 二. 模型构建

In [None]:
from tensorflow import keras
from keras import layers

model = keras.models.Sequential() # build a sequential model.

# add a fully connected neural network layer.
model.add(
    layers.Dense(
        units = 64, # the number of neurons in the hidden layer.
        input_dim = 3, # input dimension.
        activation = 'relu' # activation func.
    )
)
model.add(
    layers.Dense(
        units = 1 # cuz the model is used to predict the data, the output layer does not use an activation function.
    )
)

## 三. 模型训练

In [None]:
model.compile(optimizer='adam', loss='mse')  # adam the best, mse is generally used for data prediction.

In [None]:
import matplotlib.pyplot as plt
history = model.fit(x, y, batch_size=200, epochs=2000) # train.
plt.plot(range(2000), history.history['loss'])
plt.show()

## 四. 模型评估

In [None]:
import matplotlib.pyplot as plt
data = data.sample(frac=1).reset_index(drop=True) # shuffle the data
x = data.iloc[:,1:-1]
y = data.iloc[:,-1]
x_train, y_train = x[:160], y[:160] # extract the first 160 groups as the training data set.
x_val, y_val = x[160:180], y[160:180] # extract the 20 groups after that as the validation data set.
x_test, y_test = x[180:], y[180:] # extract the last 20 groups as the test data set.

history = model.fit(x_train, y_train, batch_size=160, epochs=500, validation_data=(x_val, y_val)) # train.

In [None]:
plt.plot(range(500), history.history['loss'])
plt.plot(range(500), history.history['val_loss'])
plt.show()

print('test_loss:', model.evaluate(x_test, y_test))

## 五. 模型应用

In [None]:
x_input = pd.DataFrame({
    'TV':[100,50,0],
    'radio':[50,100,0],
    'newspaper':[0,0,150]
})
model.predict(x_input)

# 全神经网络实现分类案例

## 一. 数据准备

In [3]:
from tensorflow import keras
from keras import layers
import pandas as pd

data = pd.read_csv("iris.csv")
data = data.join(pd.get_dummies(data.Species)) # one-hot encoding
data.head()

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species,Iris-setosa,Iris-versicolor,Iris-virginica
0,5.1,3.5,1.4,0.2,Iris-setosa,1,0,0
1,4.9,3.0,1.4,0.2,Iris-setosa,1,0,0
2,4.7,3.2,1.3,0.2,Iris-setosa,1,0,0
3,4.6,3.1,1.5,0.2,Iris-setosa,1,0,0
4,5.0,3.6,1.4,0.2,Iris-setosa,1,0,0


In [15]:
data = data.sample(frac=1).reset_index(drop=True) # shuffle the data
x_train = data[data.columns[1:5]]
y_train = data[data.columns[-3:]][:120]
x_test = data[data.columns[1:5]][120:]
y_test = data[data.columns[-3:]][120:]
print(x_train)
print("______________________________")
print(y_train)

     Sepal.Width  Petal.Length  Petal.Width          Species
0            4.4           1.5          0.4      Iris-setosa
1            2.8           5.6          2.2   Iris-virginica
2            2.9           4.2          1.3  Iris-versicolor
3            2.6           4.0          1.2  Iris-versicolor
4            3.0           4.6          1.4  Iris-versicolor
..           ...           ...          ...              ...
145          3.0           5.9          2.1   Iris-virginica
146          3.7           1.5          0.4      Iris-setosa
147          2.9           4.3          1.3  Iris-versicolor
148          4.1           1.5          0.1      Iris-setosa
149          3.4           1.6          0.2      Iris-setosa

[150 rows x 4 columns]
______________________________
     Iris-setosa  Iris-versicolor  Iris-virginica
0              1                0               0
1              0                0               1
2              0                1               0
3            

## 二. 模型构建

In [7]:
model = keras.models.Sequential()
model.add(
    layers.Dense(
        units=32,
        input_dim=4,
        activation='relu'
    )
)
model.add(
    layers.Dense(
        units=3,
        activation='softmax'
    )
)

## 三.模型训练

In [8]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['acc'])
history=model.fit(x_train, y_train, batch_size=120, epochs=200, validation_split=0.3)

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).