In [1]:
import matplotlib
matplotlib.use('Agg') # Must be before importing matplotlib.pyplot or pylab!
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from IPython.core import display as ICD
import seaborn as sns
import glob
import scipy
import os
pd.set_option('display.max_columns', 100)

In [2]:
# import scikit learn packages

from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.linear_model import RidgeCV
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.decomposition import PCA

from sklearn.pipeline import make_pipeline

In [3]:
from helpers import *

**Parameters**

In [4]:
seasonwise = True
feature_selection = True
alphas = np.logspace(-10,5,3)
deg = 2
train_dim = 0.01
test_dim = 0.39
validate_dim = 0.6

seasons_list = ['spring','summer','autumn','winter']

consistency_splitting(train_dim, test_dim ,validate_dim)

model = make_pipeline(StandardScaler(),PolynomialFeatures(deg,include_bias=True), RidgeCV(alphas))

Consistent Split



In [5]:
if seasonwise==False:
    feature_selection = False

In [6]:
#Local
DATA_FOLDER = r'../data_extractor_scripts/'
RESULTS_FOLDER = r'./'
#Server
# DATA_FOLDER = '~/scripts/'
# RESULTS_FOLDER = '/raid/motus/results/ridgeregression/'

In [7]:
tot_df=pd.read_csv(DATA_FOLDER+'regression_mat_year.csv',index_col=0)

In [10]:
if feature_selection:
    df_features_sel = pd.read_csv('feature_for_ridge_original.txt',header=None)
    
#     def season_features_transform(df):
#         feature_seasonwise=[]
#         prev_bol=False
#         for row_count,row in enumerate(df_features_sel[0]):
#             season_row=[]
#             for s in seasons_list:
#                 season_row.append(row==s.capitalize())
#             #print(prev_bol,sum(season_row)==1)
#             if prev_bol==True and (sum(season_row)==1)==False:
#                 feature_seasonwise.append(season_features)        
#             if (sum(season_row)==1):
#                 season_features=[]
#             else:
#                 season_features.append(row[:-1])
#             prev_bol=sum(season_row)==1
#         return feature_seasonwise

    def season_features_transform(df_features_sel):
        season_position=[]
        for row_count,row in enumerate(df_features_sel[0]):
            season_row=[]
            for s in seasons_list:
                season_row.append(row==s.capitalize())
            season_position.append(sum(season_row)==1)
        season_pos_idx=np.where(season_position)[0]
        feature_seasonwise=[]
        print(season_pos_idx)
        for interval in range(season_pos_idx.shape[0]-1):
            season_temp=[]
            for entry in range(season_pos_idx[interval]+1,season_pos_idx[interval+1]):
                season_temp.append(df_features_sel[0][entry][:-1])
            feature_seasonwise.append(season_temp)
        feature_seasonwise.append(list(map(lambda i:i[:-1],df_features_sel[0][42+1:].tolist())))
        return feature_seasonwise
        
    lst_features_sel = season_features_transform(df_features_sel)

[ 0 14 28 42]


Transform absolute value and direction in vector components

In [12]:
tot_df = vectorize_wind_speed(tot_df)

Split season by season

In [13]:
if seasonwise:
    df_spring, df_summer, df_autumn, df_winter = season_splitter(tot_df)
    season_dfs=[df_spring, df_summer, df_autumn, df_winter]
    
else:
    season_dfs=[tot_df]

In [14]:
#Empty dataframes
mses_results=[]
r_2s_results=[]
mag_avg_pred_results=[]
mag_avg_true_results=[]

In [15]:
for index,df in enumerate(season_dfs):
    if len(season_dfs)>2:
        names=seasons_list
    else:
        names=['allyear']
    
    #Printing progress
    print('Period under optimization: ',names[index])
    
    #Dividing X and y
    X = df.drop(columns=['u_x', 'u_y','u_z'])
    print(df.columns)
    print(lst_features_sel[index])
    if feature_selection:
        X=np.array(X[lst_features_sel[index]])
        h_position=lst_features_sel[index].index('h')
    else:
        X=np.array(X)   
    y = np.array(df[['u_x', 'u_y']])
    
    #Splitting Matrices
    X_tr, X_temp, y_tr, y_temp = train_test_split(X, y, test_size=test_dim+validate_dim, random_state=50)
    X_te, X_va, y_te, y_va = train_test_split(X_temp, y_temp, test_size=validate_dim/(test_dim+validate_dim), random_state=50)
    X_temp = None
    y_temp = None
    
    print ('X_te_hs.shape: ',X_te[0])
    #Make a list with differet heights
    if feature_selection:
        X_te_hs, y_te_hs = split_hs_test(X_te,y_te,h_pos=h_position)
    else:
        X_te_hs, y_te_hs = split_hs_test(X_te,y_te)
    #print ('X_te_hs.shape: ',X_te_hs[0][3,:])
    
    #Fit the model 
    model.fit(X_tr,y_tr)
    
    y_pred_hs=[]
    mag_avg_pred=[]
    for hs in X_te_hs:
        ans=model.predict(hs)
        y_pred_hs.append(ans)
        ans=np.sqrt(ans[:,0]**2+ans[:,1]**2).mean()
        mag_avg_pred.append(ans)
    mag_avg_pred.append(names[index])
    mag_avg_pred_results.append(mag_avg_pred)

    
    mses=[]
    mag_avg_true=[]
    for idx,i in enumerate(y_pred_hs):
        mses.append(mean_squared_error(y_te_hs[idx],i))
        ans=y_te_hs[idx]
        ans=np.sqrt(ans[:,0]**2+ans[:,1]**2).mean()
        mag_avg_true.append(ans)
    mses.append(names[index])
    mag_avg_true.append(names[index])
    mses_results.append(mses)
    mag_avg_true_results.append(mag_avg_true)
    
    r_2s=[]
    for idx,i in enumerate(y_pred_hs):
        r_2s.append(r2_score(y_te_hs[idx],i))
    r_2s.append(names[index])
    r_2s_results.append(r_2s)
    
    plot_ys(y_pred_hs,y_te_hs,RESULTS_FOLDER+'/images/',save=True,name=(names[index]+'-'))

Period under optimization:  spring
Index(['u_z', 'sonic_temp', 'h', 'u_top_z', 'sonic_temp_top',
       'Pyranometer Upper Irradiance [W/m$^2$]',
       'Pyranometer Lower Irradiance [W/m$^2$]',
       'Pyrgeometer Upper Irradiance [W/m$^2$]',
       'Pyrgeometer Lower Irradiance [W/m$^2$]',
       'Net Solar radiation [W/m$^2$]', 'Net (total) radiation [W/m$^2$]',
       'Net Far Infrared radiation [W/m$^2$]', 'Sky temperature [°C]',
       'Radiometer Ground temperature [°C]', 'Sensor Ground temperature [°C]',
       'North temperature [°C]', 'East temperature [°C]',
       'South temperature [°C]', 'West temperature [°C]', 'u_x', 'u_y',
       'u_top_x', 'u_top_y'],
      dtype='object')
['h', 'u_top_y', 'u_top_x', 'sonic_temp', 'u_top_z', 'Net Solar radiation [W/m$^2$]', 'Pyranometer Upper Irradiance [W/m$^2$]', 'Net (total) radiation [W/m$^2$]', 'Net Far Infrared radiation [W/m$^2$]', 'Pyranometer Lower Irradiance [W/m$^2$]', 'North temperature [°C]', 'sonic_temp_top', 'Sensor Gro

KeyError: "['Sky temperature [°C'] not in index"

In [None]:
mses_results_df=pd.DataFrame(mses_results)
mses_results_df.to_csv(RESULTS_FOLDER+'mses_results.txt',header=None, sep=',', mode='a')

In [None]:
r_2s_results_df=pd.DataFrame(r_2s_results)
r_2s_results_df.to_csv(RESULTS_FOLDER+'r_2s_results.txt',header=None, sep=',', mode='a')

In [None]:
pd.DataFrame(mag_avg_pred_results).to_csv(RESULTS_FOLDER+'magnitude_average_pred.txt',header=None, sep=',', mode='a')
pd.DataFrame(mag_avg_true_results).to_csv(RESULTS_FOLDER+'magnitude_average_true.txt',header=None, sep=',', mode='a')

In [None]:
print('JOB Finished')