# 回归方程

In [None]:
from sklearn import linear_model
import matplotlib.pyplot as plt

Xi = [[2], [5], [9], [12], [14]]
Yi = [[3], [6], [8], [10], [13]]

In [None]:
model = linear_model.LinearRegression() # setting
model.fit(Xi, Yi) # model practice

In [None]:
print("y=", model.coef_, "x=", model.intercept_) # the function plot
print("R^2=", model.score(Xi, Yi))

In [None]:
y_plot = model.predict(Xi)

plt.scatter(Xi, Yi, color = 'red', label="sample data", linewidth = 2) # draw
plt.plot(Xi, y_plot, color = 'green', label = "regression data", linewidth =  2)

plt.legend()
plt.show()

# 信息熵

In [None]:
import numpy as np
e=-(0.5*np.log2(0.5)+0.25*np.log2(0.25)+0.25*np.log2(0.25))
print(e)

# 决策树

In [None]:
from sklearn import datasets
iris = datasets.load_iris()
X = iris['data']
Y = iris['target']
print(X.shape, Y.shape)
print(X[:3])
print(Y)

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt_clf = DecisionTreeClassifier()
dt_clf.fit(X, Y)

In [None]:
dt_clf.predict([[5,3,5,1.5]])

# 全连接神经网络实现

## 一. 读取数据

In [7]:
import pandas as pd
data = pd.read_csv('Advertising.csv') # read data from file
data.head() # take the first five rows of data
# the type of 'data' is Dataframe

Unnamed: 0.1,Unnamed: 0,TV,Radio,Newspaper,Sales
0,1,230.1,37.8,69.2,22.1
1,2,44.5,39.3,45.1,10.4
2,3,17.2,45.9,69.3,9.3
3,4,151.5,41.3,58.5,18.5
4,5,180.8,10.8,58.4,12.9


In [19]:
x = data.iloc[:,1:-1] # extract input data
# the former is to define the column and the latter is to define the row
y = data.iloc[:,-1] # extract the labels
print(x)
print("____________")
print(y)

        TV  Radio  Newspaper
0    230.1   37.8       69.2
1     44.5   39.3       45.1
2     17.2   45.9       69.3
3    151.5   41.3       58.5
4    180.8   10.8       58.4
..     ...    ...        ...
195   38.2    3.7       13.8
196   94.2    4.9        8.1
197  177.0    9.3        6.4
198  283.6   42.0       66.2
199  232.1    8.6        8.7

[200 rows x 3 columns]
____________
0      22.1
1      10.4
2       9.3
3      18.5
4      12.9
       ... 
195     7.6
196     9.7
197    12.8
198    25.5
199    13.4
Name: Sales, Length: 200, dtype: float64


## 二. 模型构建

In [None]:
from tensorflow import keras
from keras import layers

model = keras.models.Sequential() # build a sequential model

# add a fully connected neural network layer
model.add(
    layers.Dense(
        units = 64, # the number of neurons in the hidden layer
        input_dim = 3, # input dimension
        activation = 'relu' # activation func
    )
)
model.add(
    layers.Dense(
        units = 1
    )
)

In [None]:
model.compile(optimizer='adam', loss='mse')
history = model.fit(x, y, batch_size=200, epochs=2000)

In [None]:
import matplotlib.pyplot as plt
plt.plot(range(2000), history.history['loss'])
plt.show()

In [None]:
import matplotlib.pyplot as plt
data = data.sample(frac=1).reset_index(drop=True)
x = data.iloc[:,1:-1]
y = data.iloc[:,-1]
x_train, y_train = x[:160], y[:160]
x_val, y_val = x[160:180], y[160:180]
x_test, y_test = x[180:], y[180:]

history = model.fit(x_train, y_train, batch_size=160, epochs=500, validation_data=(x_val, y_val))

In [None]:

plt.plot(range(500), history.history['loss'])
plt.plot(range(500), history.history['val_loss'])
plt.show()

print('test_loss:', model.evaluate(x_test, y_test))
