## 气温预测（神经网络）

In [3]:
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.optim as optim
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

In [4]:
features=pd.read_csv("temp.csv",encoding='utf-8',error_bad_lines=False)
#看看数据长啥样
features.head()

Unnamed: 0,year,month,day,week,temp_2,temp_1,average,actual,friend
0,2016,1,1,fri,45,45,45,45,29
1,2016,1,2,sat,44,45,45,44,32
2,2016,1,3,sun,45,44,45,41,24
3,2016,1,4,mon,44,41,42,40,42
4,2016,1,5,tues,41,40,41,44,53


In [5]:
print('数据维度',features.shape)

数据维度 (9, 9)


In [6]:
#处理时间数据
import datetime
years=features['year']
months=features['month']
days=features['day']

#datetime格式
dates=[str(int(year))+'-'+str(int(month))+'-'+str(int(day)) for year,month,day in zip(years,months,days)]
dates=[datetime.datetime.strptime(date,'%Y-%m-%d') for date in dates]

In [7]:
dates[:5]

[datetime.datetime(2016, 1, 1, 0, 0),
 datetime.datetime(2016, 1, 2, 0, 0),
 datetime.datetime(2016, 1, 3, 0, 0),
 datetime.datetime(2016, 1, 4, 0, 0),
 datetime.datetime(2016, 1, 5, 0, 0)]

将字符串转换为独热编码

In [8]:
features=pd.get_dummies(features)
features.head(5)

Unnamed: 0,year,month,day,temp_2,temp_1,average,actual,friend,week_fri,week_mon,week_sat,week_sun,week_tues
0,2016,1,1,45,45,45,45,29,1,0,0,0,0
1,2016,1,2,44,45,45,44,32,0,0,1,0,0
2,2016,1,3,45,44,45,41,24,0,0,0,1,0
3,2016,1,4,44,41,42,40,42,0,1,0,0,0
4,2016,1,5,41,40,41,44,53,0,0,0,0,1


In [9]:
#标签
labels=np.array(features['actual'])
#在特征中去掉标签
features=features.drop('actual',axis=1)
#名字单独保存下
features_list=list(features.columns)
#转换为合适的格式
features=np.array(features)
features.shape

(9, 12)

## 构建网络模型

In [10]:
from sklearn import preprocessing
#归一化
input_features=preprocessing.StandardScaler().fit_transform(features)
input_features

array([[ 0.        ,  0.        , -1.54919334,  1.32287566,  1.39457984,
         1.33394594, -0.42949421,  1.41421356, -0.35355339, -0.53452248,
        -0.53452248, -0.35355339],
       [ 0.        ,  0.        , -1.161895  ,  0.75592895,  1.39457984,
         1.33394594, -0.19283413, -0.70710678, -0.35355339,  1.87082869,
        -0.53452248, -0.35355339],
       [ 0.        ,  0.        , -0.77459667,  1.32287566,  0.84887469,
         1.33394594, -0.82392766, -0.70710678, -0.35355339, -0.53452248,
         1.87082869, -0.35355339],
       [ 0.        ,  0.        , -0.38729833,  0.75592895, -0.78824078,
        -0.30316953,  0.59603278, -0.70710678,  2.82842712, -0.53452248,
        -0.53452248, -0.35355339],
       [ 0.        ,  0.        ,  0.        , -0.94491118, -1.33394594,
        -0.84887469,  1.46378638, -0.70710678, -0.35355339, -0.53452248,
        -0.53452248,  2.82842712],
       [ 0.        ,  0.        ,  0.38729833, -1.51185789, -1.33394594,
        -1.39457984, -

In [11]:
#定义网络类型，以及优化器
input_size=input_features.shape[1]
hidden_size=128
output_size=1
batch_size=16
my_nn=torch.nn.Sequential(
    torch.nn.Linear(input_size,hidden_size),
    torch.nn.Sigmoid(),
    torch.nn.Linear(hidden_size,output_size)
)
cost=torch.nn.MSELoss(reduction='mean')
optimizer=torch.optim.Adam(my_nn.parameters(),lr=0.01)

In [54]:
#训练网络
losses=[]
for i in range(1000):
    batch_loss=[]
    #用minibatch的方法训练
    for start in range(0,len(input_features),batch_size):
        end=start+batch_size if start+batch_size<len(input_features) else len(input_features)
        xx=torch.tensor(input_features[start:end],dtype=torch.float,requires_grad=True)
        yy=torch.tensor(labels[start:end],dtype=torch.float,requires_grad=True)
        prediction=my_nn(xx)
        loss=cost(prediction,yy)
        #梯度清0
        optimizer.zero_grad()
        loss.backward(retain_graph=True)
        optimizer.step()
        batch_loss.append(loss.data.numpy())
    if i%100==0:
        losses.append(np.mean(batch_loss))
        print(i,np.mean(batch_loss))

0 1793.7988
100 2.4984505
200 2.4444468
300 2.444445
400 2.444445
500 2.444445
600 2.4444447
700 2.444445
800 2.444445
900 2.444445
