In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.core import display as ICD
import seaborn as sns
import glob
import scipy
import os
pd.set_option('display.max_columns', 100)
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

In [2]:
DATA_FOLDER = './avg_data/'

### Regression matrix creation

In [3]:
os.listdir(DATA_FOLDER)

['anem1.txt',
 'anem2.txt',
 'anem3.txt',
 'anem4.txt',
 'anem5.txt',
 'anem6.txt',
 'anem7.txt',
 'radiometer.csv']

In [4]:
all_anem=glob.glob(DATA_FOLDER + "anem*")
mast_anem=all_anem[:6]
top_anem=all_anem[6]

In [5]:
only_mag=True
h=np.arange(1.5,22,4)
df_list=[]
for idx,i in enumerate(mast_anem):
    df_temp=pd.read_csv(i, sep=" ", header=None, index_col=0, names=['u','direction'])
    df_temp['h']=h[idx]
    df_list.append(df_temp)
mast_anem_df = pd.concat(df_list)
mast_anem_df


Unnamed: 0,u,direction,h
2018-11-14 00:00:00,0.57,185.0,1.5
2018-11-14 00:01:00,0.61,248.0,1.5
2018-11-14 00:02:00,0.44,187.0,1.5
2018-11-14 00:03:00,0.63,206.0,1.5
2018-11-14 00:04:00,0.58,160.0,1.5
2018-11-14 00:05:00,0.50,209.0,1.5
2018-11-14 00:06:00,0.62,175.0,1.5
2018-11-14 00:07:00,0.69,233.0,1.5
2018-11-14 00:08:00,0.96,238.0,1.5
2018-11-14 00:09:00,0.50,203.0,1.5


In [6]:
rad_df = pd.read_csv(DATA_FOLDER+'radiometer.csv', sep=" ",index_col=0)
top_anem_df = pd.read_csv(top_anem, sep=" ",index_col=0,names=['u_top','direction_top'])
time_var_df = top_anem_df.merge(rad_df,how='left', left_index=True,right_index=True)
time_var_df

Unnamed: 0,u_top,direction_top,Pyranometer Upper Irradiance [W/m$^2$],Pyranometer Lower Irradiance [W/m$^2$],Pyrgeometer Upper Irradiance [W/m$^2$],Pyrgeometer Lower Irradiance [W/m$^2$],Albedo [-],Net Solar radiation [W/m$^2$],Net (total) radiation [W/m$^2$],Net Far Infrared radiation [W/m$^2$],Sky temperature [°C],Radiometer Ground temperature [°C]
2018-11-14 00:00:00,2.71,328.0,1.8,-2.9,369.0,364.7,-1.100000,4.7,9.0,4.3,10.9,10.0
2018-11-14 00:01:00,2.60,328.0,2.2,-0.0,371.7,365.8,0.600000,2.2,8.1,5.9,11.4,10.3
2018-11-14 00:02:00,3.15,328.0,2.4,-2.1,370.7,366.0,-0.700000,4.5,9.2,4.7,11.2,10.3
2018-11-14 00:03:00,2.49,329.0,1.6,-0.8,369.0,368.5,-0.100000,2.4,3.0,0.5,10.9,10.8
2018-11-14 00:04:00,3.03,332.0,3.3,-2.6,370.0,365.9,0.100000,5.9,10.0,4.1,11.1,10.3
2018-11-14 00:05:00,2.84,333.0,3.8,-0.7,368.4,363.9,inf,4.5,9.0,4.5,10.7,9.9
2018-11-14 00:06:00,3.57,335.0,5.4,-2.3,368.4,366.8,-inf,7.6,9.2,1.6,10.8,10.4
2018-11-14 00:07:00,2.67,330.0,2.9,-2.4,370.1,364.4,0.000000,5.2,10.9,5.7,11.1,10.0
2018-11-14 00:08:00,3.00,278.0,5.0,-0.1,367.6,366.1,-1.400000,5.1,6.6,1.5,10.6,10.3
2018-11-14 00:09:00,3.35,324.0,4.6,-3.1,369.2,364.5,inf,7.7,12.4,4.7,10.9,10.0


In [7]:
tot_df=mast_anem_df.merge(time_var_df,how='left', left_index=True,right_index=True)
tot_df = tot_df.dropna(axis=0, how='any')
tot_df

Unnamed: 0,u,direction,h,u_top,direction_top,Pyranometer Upper Irradiance [W/m$^2$],Pyranometer Lower Irradiance [W/m$^2$],Pyrgeometer Upper Irradiance [W/m$^2$],Pyrgeometer Lower Irradiance [W/m$^2$],Albedo [-],Net Solar radiation [W/m$^2$],Net (total) radiation [W/m$^2$],Net Far Infrared radiation [W/m$^2$],Sky temperature [°C],Radiometer Ground temperature [°C]
2018-11-14 00:00:00,0.57,185.0,1.5,2.71,328.0,1.8,-2.9,369.0,364.7,-1.100000,4.7,9.0,4.3,10.9,10.0
2018-11-14 00:00:00,0.66,160.0,5.5,2.71,328.0,1.8,-2.9,369.0,364.7,-1.100000,4.7,9.0,4.3,10.9,10.0
2018-11-14 00:00:00,0.91,225.0,9.5,2.71,328.0,1.8,-2.9,369.0,364.7,-1.100000,4.7,9.0,4.3,10.9,10.0
2018-11-14 00:00:00,2.64,284.0,13.5,2.71,328.0,1.8,-2.9,369.0,364.7,-1.100000,4.7,9.0,4.3,10.9,10.0
2018-11-14 00:00:00,2.98,313.0,17.5,2.71,328.0,1.8,-2.9,369.0,364.7,-1.100000,4.7,9.0,4.3,10.9,10.0
2018-11-14 00:00:00,2.76,298.0,21.5,2.71,328.0,1.8,-2.9,369.0,364.7,-1.100000,4.7,9.0,4.3,10.9,10.0
2018-11-14 00:01:00,0.61,248.0,1.5,2.60,328.0,2.2,-0.0,371.7,365.8,0.600000,2.2,8.1,5.9,11.4,10.3
2018-11-14 00:01:00,0.69,187.0,5.5,2.60,328.0,2.2,-0.0,371.7,365.8,0.600000,2.2,8.1,5.9,11.4,10.3
2018-11-14 00:01:00,0.99,272.0,9.5,2.60,328.0,2.2,-0.0,371.7,365.8,0.600000,2.2,8.1,5.9,11.4,10.3
2018-11-14 00:01:00,2.25,281.0,13.5,2.60,328.0,2.2,-0.0,371.7,365.8,0.600000,2.2,8.1,5.9,11.4,10.3


### Splitting Data

In [14]:
X = np.array(tot_df.iloc[:,2:])
y = np.array(tot_df.iloc[:,0])

(8460,)

In [15]:
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
anem1 = pd.read_csv(all_anem[0], sep=" ", header=None)
anem2 = pd.read_csv(DATA_FOLDER+'radiometer.csv', header=None)
anem1.info()
anem2