In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.core import display as ICD
import seaborn as sns
import glob
import scipy
import os
pd.set_option('display.max_columns', 100)
from helpers import *

In [2]:
import torch
from torch.autograd import Variable
import torch.nn.functional as F      # activation function
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split  # split adata
from sklearn.preprocessing import StandardScaler      # Standardzation


In [3]:
DATA_FOLDER = './'

In [4]:
RESULTS_FOLDER='/raid/motus/results/neuralnetwork/'

In [5]:
os.listdir(DATA_FOLDER)

['py_scripts',
 'neural_network.ipynb',
 'regression_mat_year.csv',
 'loss_log',
 '__pycache__',
 'ridge_regression.ipynb',
 '.ipynb_checkpoints',
 'feature_selection_rf.ipynb',
 'helpers.py',
 'feature_selection_stepwise.ipynb']

### Regression matrix manipulation

Importing regression matrix

In [6]:
tot_df=pd.read_csv(DATA_FOLDER+'regression_mat_year.csv',index_col=0)

Transform absolute value and direction in vector components

In [7]:
tot_df=vectorize_wind_speed(tot_df)

In [8]:
tot_df.shape

(535470, 23)

Shorten the matrix for developping purposes

In [None]:
number_trial=20000
tot_df_small=tot_df.ioc[:number_trial,:]
tot_df_small.shape

In [None]:
number_trial=20000
tot_df=tot_df.iloc[:number_trial,:]
tot_df

## Splitting the data

In [9]:
X = np.array(tot_df.drop(columns=['u_x', 'u_y','u_z']))
Y = np.array(tot_df[['u_x', 'u_y']]) # First consider 1 dimension output

In [10]:
X_tr_, X_te_, Y_tr_, Y_te_ = train_test_split(X, Y, test_size = 0.6, random_state = 0)

In [11]:
x_tr_, x_ev_, y_tr_, y_ev_ = train_test_split(X_tr_, Y_tr_, test_size = 0.5, random_state = 0)

In [12]:
x_ev_hs_, y_ev_hs_=split_hs_test(x_ev_,y_ev_,hs=np.arange(1.5,22,4))

In [13]:
split_hs_test(X_te_,Y_te_,hs=np.arange(1.5,22,4))

([array([[28.76798507,  1.5       ,  0.0734543 , ..., 25.666     ,
          -1.79353365,  0.26007794],
         [21.44765458,  1.5       ,  0.06789737, ..., 18.007     ,
           0.22669753, -0.72553478],
         [20.72908651,  1.5       , -0.31244171, ..., 21.81066667,
           1.92588636,  7.62442933],
         ...,
         [24.74195018,  1.5       , -0.0818368 , ..., 20.86233333,
           1.36180138, -0.5788727 ],
         [ 4.77044527,  1.5       , -0.17833659, ...,  1.95466667,
           0.71400238, -1.13771844],
         [14.44717944,  1.5       , -0.19816249, ..., 22.652     ,
          -0.28029994, -0.84167074]]),
  array([[ 1.29227797e+01,  5.50000000e+00, -1.48357488e-01, ...,
           9.51633333e+00, -4.43550425e+00,  1.79546618e+00],
         [ 1.62750196e+01,  5.50000000e+00, -2.33616525e-02, ...,
           1.25233333e+01,  5.51567947e-01, -5.21612536e-01],
         [ 1.95852663e+01,  5.50000000e+00, -1.30144976e-02, ...,
           1.55580000e+01,  1.30770041

In [None]:
df_spring, df_summer, df_autumn, df_winter=season_splitter(tot_df)
df_seasons=[df_spring, df_summer, df_autumn, df_winter]

## Preparing the data for season 

In [None]:
def seasons_net(df_seasons):
    
    for df_season in df_seasons:
        
        X = np.array(df_season.drop(columns=['u_x', 'u_y','u_z']))
        Y = np.array(df_season[['u_x', 'u_y']])
        
        #creating the train set and the test set
        X_season_TR_, X_season_te_, Y_TR_, Y_te_ = train_test_split(X, Y, test_size = 0.6, random_state = 0)

        #creating the train set and the evaluation test out of the train set
        x_season_tr_, x_season_ev_, y_season_tr_, y_season_ev_ = train_test_split(X_season_TR_, Y_TR_, test_size = 0.5, random_state = 0)

        #splitting the evaluation data in anemometers 
        x_ev_hs_, y_ev_hs_=split_hs_test(x_season_ev_,y_season_ev_,hs=np.arange(1.5,22,4))
        
        k = StandardScaler().fit(x_season_tr_)
        x_season_tr_ = k.transform(x_season_tr_)
        
        # standardize the evaluation test according to the metric of the training set
        
        x_season_ev_ = k.transform(x_season_ev_)
        
        # standardize the anemometers values of the evaluation test according to the metric of the training set
        x_ev_standard_hs_=[]
        for x_ev_h_ in x_ev_hs_:
    
            x_ev_standard_hs_.append(k.transform(x_ev_h_))
        
        # converting the evalation and training data into tensors and into variables
        elements_array=[x_season_tr_, x_season_ev_, y_season_tr_, y_season_ev_]
        elements_variables=[]
        
        for element in elements_array:
            
            elements_variables.append(Variable(torch.from_numpy(element).type(torch.FloatTensor)))
        
        # converting the anemometers arrays into tensors and into variables
        x_ev_standard_hs_variable=[]
        y_ev_hs_variable=[]

        for x_ev_standard_h_, y_ev_h_ in zip(x_ev_standard_hs_, y_ev_hs_):

            x_ev_standard_hs_variable.append(Variable(torch.from_numpy(x_ev_standard_h_).type(torch.FloatTensor)))
            y_ev_hs_variable.append(Variable(torch.from_numpy(y_ev_h_).type(torch.FloatTensor)))
            
        return elements_variables, x_ev_standard_hs_variable, y_ev_hs_variable


In [None]:
a,b,c=seasons_net(df_seasons)

## Preparing the test data for performance evaluation 

In [None]:
def prepare_test_data(x_tr_, X_te_, Y_te_):
    
    #standardize test data
    k = StandardScaler().fit(x_tr_)
    
    #splitting the test data in anemometers 
    X_te_hs_, Y_te_hs_=split_hs_test(X_te_,Y_te_,hs=np.arange(1.5,22,4))
    
    X_te_ = k.transform(X_te_)
    
    # standardize the anemometers values of the test set according to the metric of the training set
    X_te_standard_hs_=[]
    for X_te_h_ in X_te_hs_:

        X_te_standard_hs_.append(k.transform(X_te_h_))

    # converting the testing data into tensors and into variables
    elements_array=[X_te_, Y_te_]
    elements_variables=[]

    for element in elements_array:

        elements_variables.append(Variable(torch.from_numpy(element).type(torch.FloatTensor)))
    
    # converting the anemometers arrays into testing tensors and into variables
    X_te_standard_hs_variable=[]
    Y_te_hs_variable=[]

    for X_te_standard_h_, Y_te_h_ in zip(X_te_standard_hs_, Y_te_hs_):

        X_te_standard_hs_variable.append(Variable(torch.from_numpy(X_te_standard_h_).type(torch.FloatTensor)))
        Y_te_hs_variable.append(Variable(torch.from_numpy(Y_te_h_).type(torch.FloatTensor)))
    
    return elements_variables, X_te_standard_hs_variable, Y_te_hs_variable

In [None]:
a,b,c=prepare_test_data(x_tr_, X_te_, Y_te_)

## Standardizing the training data and tensor transformation

In [23]:
def prepare_train_data(x_tr_,x_ev_,y_tr_,y_ev_):
    
    k = StandardScaler().fit(x_tr_)
    x_tr_ = k.transform(x_tr_)
    x_ev_ = k.transform(x_ev_)

    x_tr = Variable(torch.from_numpy(x_tr_).type(torch.FloatTensor))
    y_tr = Variable(torch.from_numpy(y_tr_).type(torch.FloatTensor))
    x_ev = Variable(torch.from_numpy(x_ev_).type(torch.FloatTensor))
    y_ev = Variable(torch.from_numpy(y_ev_).type(torch.FloatTensor))

    x_ev_standard_hs_=[]
    for x_ev_h_ in x_ev_hs_:

        x_ev_standard_hs_.append(k.transform(x_ev_h_))

    x_ev_standard_hs=[]

    for x_ev_standard_h_ in x_ev_standard_hs_:

        x_ev_standard_hs.append(torch.from_numpy(x_ev_standard_h_).type(torch.FloatTensor))

    x_ev_standard_hs_variable=[]
    y_ev_hs_variable=[]

    for x_ev_standard_h, y_ev_h_ in zip(x_ev_standard_hs, y_ev_hs_):

        x_ev_standard_hs_variable.append(Variable(x_ev_standard_h))
        y_ev_hs_variable.append(Variable(torch.from_numpy(y_ev_h_).type(torch.FloatTensor)))
        
    return x_tr,y_tr,x_ev,y_ev,x_ev_standard_hs_variable,y_ev_hs_variable

# Neural Network

### Build the Net

In [19]:
class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()
        self.hidden1 = torch.nn.Linear(n_feature, n_hidden)  
        self.hidden2 = torch.nn.Linear(n_hidden, n_hidden)
        self.predict = torch.nn.Linear(n_hidden, n_output)  
        
    def forward(self, x):
        a1 = F.relu(self.hidden1(x)) 
        a2 = F.relu(self.hidden2(a1))
        #a1 = torch.sigmoid(self.hidden(x))
        #a1 = self.hidden(x) # 隐藏层用 relu
        y = self.predict(a2)
        return y

net = Net(n_feature=x_tr_.shape[1], n_hidden=100, n_output=2)
print(net)

Net(
  (hidden1): Linear(in_features=20, out_features=100, bias=True)
  (hidden2): Linear(in_features=100, out_features=100, bias=True)
  (predict): Linear(in_features=100, out_features=2, bias=True)
)


Establish loss function and optimizer

In [20]:
loss_func = torch.nn.MSELoss()   
optimizer = torch.optim.SGD(net.parameters(), lr=0.2)

Split the test dataset according to the different anemometers

Convert the vectors in tensors

### Train the Net

In [21]:
def training_net(x_tr,y_tr,x_ev, y_ev,epochs):
    # the inputs of the function are variables tensors and epoch is the 
    # number of iterations of the backward propagation

    losses=[]
    mse_0=1000
    
    for t in range(epochs):
        prediction = net(x_tr)
        loss = loss_func(prediction, y_tr)
        
        mse=np.average((y_ev-prediction)^2)
        if mse_0-mse<0:
            break
        mse_0=mse

        #print loss
        optimizer.zero_grad()    # clear gradients for next train
        loss.backward()          # backpropagation
        optimizer.step()         # update（w、b）
        
        mse_s=[]
        if t%10==0:
            print ('The loss is',loss.detach().numpy())
            print ('The mse is',mse)
            
            losses.append(loss.detach().numpy())
            mse_s.append(mse)

    return pd.Series(losses).to_csv(RESULTS_FOLDER+'loss_log'), pd.Series(mses).to_csv(RESULTS_FOLDER+'mses_log')

### Prediction visualisation per anemometer

In [None]:
y_preds=[]
for i in x_ev_standard_hs_variable:
    net.eval()
    predict = net(i)
    y_pred = predict.data.numpy()
    y_preds.append(y_pred)

In [None]:
plot_ys(y_preds,y_ev_,RESULTS_FOLDER,save=False,interval=[100,200],name='graph')
#inputs of these functions are arrays

In [None]:
for i, (y_pred, y_ev_i) in enumerate(zip(y_preds, y_ev_)):
    
    print ('MSE anemometer'+' ', i , 'is', np.mean(np.square(y_pred-y_ev_i)))

In [None]:
x_te_trial_standard_hs=[]

for x_te_trial_standard_h_ in x_te_trial_standard_hs_:
    
    x_te_trial_standard_hs.append(torch.from_numpy(x_te_trial_standard_h_).type(torch.FloatTensor))

In [None]:
x_tr_trial_ = torch.from_numpy(x_tr_trial_)
y_tr_trial_ = torch.from_numpy(y_tr_trial_)
x_te_trial_ = torch.from_numpy(x_te_trial_)
y_te_trial_ = torch.from_numpy(y_te_trial_)
x_tr_trial = x_tr_trial_.type(torch.FloatTensor)
y_tr_trial = y_tr_trial_.type(torch.FloatTensor)
x_te_trial = x_te_trial_.type(torch.FloatTensor)
y_te_trial = y_te_trial_.type(torch.FloatTensor)
x_tr_trial, y_tr_trial = Variable(x_tr_trial), Variable(y_tr_trial)
x_te_trial, y_te_trial = Variable(x_te_trial), Variable(y_te_trial)

x_te_trial_standard_hs_variable=[]
y_te_trial_hs_variable=[]

for x_te_trial_standard_h, y_te_trial_s_ in zip(x_te_trial_standard_hs, y_te_trial_hs_):
    
    x_te_trial_standard_hs_variable.append(Variable(x_te_trial_standard_h ))
    y_te_trial_hs_variable.append(Variable(torch.from_numpy(y_te_trial_s_).type(torch.FloatTensor)))

## Standardizing the Data for Season 

In [None]:
X_spring_tr_standard=(df_spring_tr.drop(columns=['u_x', 'u_y','u_z'])-df_spring_tr.drop(columns=['u_x', 'u_y','u_z']).mean(axis=0))/df_spring_tr.drop(columns=['u_x', 'u_y','u_z']).std(axis=0)
X_autumn_tr_standard=(df_autumn_tr.drop(columns=['u_x', 'u_y','u_z'])-df_autumn_tr.drop(columns=['u_x', 'u_y','u_z']).mean(axis=0))/df_autumn_tr.drop(columns=['u_x', 'u_y','u_z']).std(axis=0)
X_summer_tr_standard=(df_summer_tr.drop(columns=['u_x', 'u_y','u_z'])-df_summer_tr.drop(columns=['u_x', 'u_y','u_z']).mean(axis=0))/df_summer_tr.drop(columns=['u_x', 'u_y','u_z']).std(axis=0)
X_winter_tr_standard=(df_winter_tr.drop(columns=['u_x', 'u_y','u_z'])-df_winter_tr.drop(columns=['u_x', 'u_y','u_z']).mean(axis=0))/df_winter_tr.drop(columns=['u_x', 'u_y','u_z']).std(axis=0)

In [None]:
y_spring_tr=np.array(df_spring_tr[['u_x','u_y']])
y_summer_tr=np.array(df_summer_tr[['u_x','u_y']])
y_autumn_tr=np.array(df_autumn_tr[['u_x','u_y']])
y_winter_tr=np.array(df_winter_tr[['u_x','u_y']])

In [None]:
X_spring_te_standard=(df_spring_te.drop(columns=['u_x', 'u_y','u_z'])-df_spring_te.drop(columns=['u_x', 'u_y','u_z']).mean(axis=0))/df_spring_te.drop(columns=['u_x', 'u_y','u_z']).std(axis=0)
X_autumn_te_standard=(df_autumn_te.drop(columns=['u_x', 'u_y','u_z'])-df_autumn_te.drop(columns=['u_x', 'u_y','u_z']).mean(axis=0))/df_autumn_te.drop(columns=['u_x', 'u_y','u_z']).std(axis=0)
X_summer_te_standard=(df_summer_te.drop(columns=['u_x', 'u_y','u_z'])-df_summer_te.drop(columns=['u_x', 'u_y','u_z']).mean(axis=0))/df_summer_te.drop(columns=['u_x', 'u_y','u_z']).std(axis=0)
X_winter_te_standard=(df_winter_te.drop(columns=['u_x', 'u_y','u_z'])-df_winter_te.drop(columns=['u_x', 'u_y','u_z']).mean(axis=0))/df_winter_te.drop(columns=['u_x', 'u_y','u_z']).std(axis=0)

In [None]:
y_spring_te=np.array(df_spring_te[['u_x','u_y']])
y_summer_te=np.array(df_summer_te[['u_x','u_y']])
y_autumn_te=np.array(df_autumn_te[['u_x','u_y']])
y_winter_te=np.array(df_winter_te[['u_x','u_y']])

## Standardizing the training data (trial set)

In [None]:
k = StandardScaler().fit(x_tr_trial_)
x_tr_trial_ = k.transform(x_tr_trial_)

## Standardizing anemometer test (trial set)

In [None]:
x_te_trial_standard_hs_=[]
for x_te_trial_h_ in x_te_trial_hs_:
    
    x_te_trial_standard_hs_.append(k.transform(x_te_trial_h_))

## Splitting the data (trial set)

In [None]:
X_trial = np.array(tot_df_small.drop(columns=['u_x', 'u_y','u_z']))
Y_trial = np.array(tot_df_small[['u_x', 'u_y']]) # First consider 1 dimension output

In [None]:
x_tr_trial_, x_te_trial_, y_tr_trial_, y_te_trial_ = train_test_split(X_trial, Y_trial, test_size = 0.3, random_state = 0)

In [None]:
x_te_trial_hs_, y_te_trial_hs_=split_hs_test(x_te_trial_,y_te_trial_,hs=np.arange(1.5,22,4))