# Datasett Ytre Vikna

Tar inn de nye datasettene for å finne ut hva som er forskjellig/likt

#### Importerer biblioteker

In [1]:
import pandas as pd
import numpy as np

# Visualization
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

# Making plots look better
import matplotlib as mpl
mpl.rcParams['patch.force_edgecolor'] = True

%config InlineBackend.figure_format = 'retina'

#### Henter ut ulike datasett

In [2]:
# TEK (13. Juli 2017 til 16. Desember 2018)
df_tek = pd.read_csv('vindkraft 130717-160218 TEK met.csv',sep =';', low_memory = False)

# Arome (1. Spetember til 16. Desember)
df_arome = pd.read_csv('vindkraft 130717-160218 arome korr winddir.csv',sep =';', low_memory = False)

In [3]:
df_tek.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5239 entries, 0 to 5238
Columns: 309 entries, Unnamed: 0 to AnmeldingskorreksjonYtreVikna_ML_train
dtypes: float64(224), int64(85)
memory usage: 12.4 MB


In [4]:
df_arome.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5239 entries, 0 to 5238
Columns: 489 entries, Unnamed: 0 to /arome_windvel_6573_1232
dtypes: float64(488), object(1)
memory usage: 19.5+ MB


## Fikser 2 timer frem med værdata

#### Arome

In [5]:
# Lager en ny dataframe
arome_2_hour_future_pred = df_arome.copy(deep=True)

# Forskyver to rader ved å fjerne de to første radene
arome_2_hour_future_pred.drop([0,1], inplace=True)

# Nullindekserer radene
arome_2_hour_future_pred.reset_index(inplace=True)

arome_2_hour_future_pred.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5237 entries, 0 to 5236
Columns: 490 entries, index to /arome_windvel_6573_1232
dtypes: float64(488), int64(1), object(1)
memory usage: 19.6+ MB


#### Tek (2 timer frem)

In [6]:
# Lager en ny dataframe
tek_2_hour_future_pred = df_tek.copy(deep=True)

# Forskyver to rader ved å fjerne de to første radene
tek_2_hour_future_pred.drop([0,1], inplace=True)

# Nullindekserer radene
tek_2_hour_future_pred.reset_index(inplace=True)

tek_2_hour_future_pred.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5237 entries, 0 to 5236
Columns: 310 entries, index to AnmeldingskorreksjonYtreVikna_ML_train
dtypes: float64(224), int64(86)
memory usage: 12.4 MB


#### Tilpasser eksisterende datasett

In [7]:
df_tek.drop([5237,5238], inplace= True)

In [8]:
df_arome.drop([5237,5238], inplace= True)

In [9]:
tek_2_hour_future_pred['Target'] = tek_2_hour_future_pred['YVIK-YtreVikna1-Sum-produksjon']

# ' data_YtreVikna_simple.csv'

In [10]:
data_simple = pd.concat([

    # Sum produksjon
    df_tek['YVIK-YtreVikna1-Sum-produksjon'], 
    
    
    
    # Værstasjoner (I sammenkomst med værmelding (arome) samme punkt )
                
    # Nord Øya Fyr
    df_tek['DNMI_75410...........T0015A3-0120'],
    df_arome['/arome_windvel_6481_1056'],
    
    # Rørvik Lufthavn 
    df_tek['DNMI_75220...........T0015A3-0120'],
    df_arome['/arome_windvel_6482_1114'],
    
    # Øy i nord
    df_tek['DNMI_75550...........T0015A3-0120'],
    df_arome['/arome_windvel_6520_1098'],       
            
    
                
    # Nærmeste stormpunkt
    tek_2_hour_future_pred['STORM-YVik1-Vindhast-25km'],
    tek_2_hour_future_pred['STORM-YVik1-Vindretn-25km'],
                
                
    # Nærmeste aromepunkter
    arome_2_hour_future_pred['/arome_windvel_6495_1081'],
                
    arome_2_hour_future_pred['/arome_windvel_6495_1096'],
            
    arome_2_hour_future_pred['/arome_airtemp_6491_1087'],              
    arome_2_hour_future_pred['/arome_windvel_6491_1087'],
                              
    arome_2_hour_future_pred['/arome_windvel_6486_1082'],
                           
    arome_2_hour_future_pred['/arome_windvel_6486_1093'],   
    
    
    
    # Target value
    tek_2_hour_future_pred['Target']
    
    
    ], 
    axis = 1)

In [11]:
data_simple.to_csv('data_ytrevikna_simple.csv', sep=';', index = False)

# 'data_ytrevikna_advanced.csv'

In [12]:
data_advanced = pd.concat([
                
    # De 17 ulike turbinenen (produksjon + status)
    df_tek['YVIK-YtreVikna1.-G1-T4015A3 -0104'], 
    df_tek['RRS.S2502.Gunit.M1 G1.AVL'],   
    df_tek['YVIK-YtreVikna1.-G2-T4015A3 -0104'], 
    df_tek['RRS.S2502.Gunit.M2 G1.AVL'],    
    df_tek['YVIK-YtreVikna1.-G3-T4015A3 -0104'], 
    df_tek['RRS.S2502.Gunit.M3 G1.AVL'],    
    df_tek['YVIK-YtreVikna1.-G4-T4015A3 -0104'], 
    df_tek['RRS.S2502.Gunit.M4 G1.AVL'],
    df_tek['YVIK-YtreVikna1.-G5-T4015A3 -0104'], 
    df_tek['RRS.S2502.Gunit.M5 G1.AVL'],
    df_tek['YVIK-YtreVikna1.-G6-T4015A3 -0104'], 
    df_tek['RRS.S2502.Gunit.M6 G1.AVL'],
    df_tek['YVIK-YtreVikna1.-G7-T4015A3 -0104'], 
    df_tek['RRS.S2502.Gunit.M7 G1.AVL'],
    df_tek['YVIK-YtreVikna1.-G8-T4015A3 -0104'], 
    df_tek['RRS.S2502.Gunit.M8 G1.AVL'],
    df_tek['YVIK-YtreVikna1.-G9-T4015A3 -0104'], 
    df_tek['RRS.S2502.Gunit.M9 G1.AVL'],
    df_tek['YVIK-YtreVikna1-G10-T4015A3 -0104'], 
    df_tek['RRS.S2502.Gunit.M10 G1.AVL'],
    df_tek['YVIK-YtreVikna1-G11-T4015A3 -0104'], 
    df_tek['RRS.S2502.Gunit.M11 G1.AVL'],
    df_tek['YVIK-YtreVikna1-G12-T4015A3 -0104'], 
    df_tek['RRS.S2502.Gunit.M12 G1.AVL'],
    df_tek['YVIK-YtreVikna1-G13-T4015A3 -0104'], 
    df_tek['RRS.S2502.Gunit.M13 G1.AVL'],
    df_tek['YVIK-YtreVikna1-G14-T4015A3 -0104'], 
    df_tek['RRS.S2502.Gunit.M14 G1.AVL'],
    df_tek['YVIK-YtreVikna1-G15-T4015A3 -0104'], 
    df_tek['RRS.S2502.Gunit.M15 G1.AVL'],
    df_tek['YVIK-YtreVikna1-G16-T4015A3 -0104'], 
    df_tek['RRS.S2502.Gunit.M16 G1.AVL'],
    df_tek['YVIK-YtreVikna1-G17-T4015A3 -0104'], 
    df_tek['RRS.S2502.Gunit.M17 G1.AVL'],
               

    # Sum produksjon
    df_tek['YVIK-YtreVikna1-Sum-produksjon'],  

    
    # Værstasjoner (korigert sammen med aromedata)
                
    # Nord Øya Fyr
    df_tek['DNMI_75410...........T0015A3-0120'],
    df_arome['/arome_windvel_6481_1056'],
    
    # Rørvik Lufthavn 
    df_tek['DNMI_75220...........T0015A3-0120'],
    df_arome['/arome_windvel_6482_1114'],
    
    # Øy i nord
    df_tek['DNMI_75550...........T0015A3-0120'],
    df_arome['/arome_windvel_6520_1098'],

    
    # Nærmeste stormpunkt
    tek_2_hour_future_pred['STORM-YVik1-Vindhast-25km'],
    tek_2_hour_future_pred['STORM-YVik1-Vindretn-25km'],
    
    # Nærmeste aromepunkt
    arome_2_hour_future_pred['/arome_windvel_6491_1087'],
    arome_2_hour_future_pred['/arome_airtemp_6491_1087'],
    arome_2_hour_future_pred['/arome_airtemp_6495_1081'],
    arome_2_hour_future_pred['/arome_airpress_6495_1081'], 
    arome_2_hour_future_pred['/arome_winddir_6495_1081'],
    arome_2_hour_future_pred['/arome_windvel_6495_1081'],
    
    arome_2_hour_future_pred['/arome_airtemp_6495_1096'],
    arome_2_hour_future_pred['/arome_airpress_6495_1096'], 
    arome_2_hour_future_pred['/arome_winddir_6495_1096'],
    arome_2_hour_future_pred['/arome_windvel_6495_1096'],

    arome_2_hour_future_pred['/arome_airtemp_6491_1087'],
    arome_2_hour_future_pred['/arome_airpress_6491_1087'], 
    arome_2_hour_future_pred['/arome_winddir_6491_1087'],
    arome_2_hour_future_pred['/arome_windvel_6491_1087'],
    
    arome_2_hour_future_pred['/arome_airtemp_6486_1082'],
    arome_2_hour_future_pred['/arome_airpress_6486_1082'], 
    arome_2_hour_future_pred['/arome_winddir_6486_1082'],
    arome_2_hour_future_pred['/arome_windvel_6486_1082'],
    
    arome_2_hour_future_pred['/arome_airtemp_6486_1093'],
    arome_2_hour_future_pred['/arome_airpress_6486_1093'], 
    arome_2_hour_future_pred['/arome_winddir_6486_1093'],
    arome_2_hour_future_pred['/arome_windvel_6486_1093'],   
    
    # Target value
    tek_2_hour_future_pred['Target']
    
   ], 
   axis = 1)

In [13]:
#data_advanced.info()

In [14]:
#data_advanced.head()

In [15]:
data_advanced.to_csv('data_ytrevikna_advanced.csv', sep=';', index = False)

## Hva er benchmark for datasettet

In [16]:
model_value = 'YVIK-YtreVikna1-Sum-produksjon'
target_value = 'Target'

In [17]:
sammenligning = pd.concat([df_tek[model_value],tek_2_hour_future_pred[target_value]], axis=1)
sammenligning.dropna(inplace= True)

In [18]:
from sklearn import metrics

print('Mean Absolute Error: \t\t\t', metrics.mean_absolute_error(sammenligning[target_value], sammenligning[model_value]))
print('Mean Squared Error: \t\t\t', metrics.mean_squared_error(sammenligning[target_value], sammenligning[model_value]))
print('Root Mean Squared Error: \t\t', np.sqrt(metrics.mean_squared_error(sammenligning[target_value], sammenligning[model_value])))

Mean Absolute Error: 			 3.1228303281
Mean Squared Error: 			 23.0012865965
Root Mean Squared Error: 		 4.7959656584


### Single

In [19]:
tek_2_hour_future_pred['Target'] = tek_2_hour_future_pred['YVIK-YtreVikna1-G17-T4015A3 -0104']

In [20]:
data_single = pd.concat([
                
    # De 17 ulike turbinenen (produksjon + status)
    df_tek['YVIK-YtreVikna1-G17-T4015A3 -0104'], 
    df_tek['RRS.S2502.Gunit.M17 G1.AVL'],
               


    
    # Værstasjoner (korigert sammen med aromedata)
                
    # Nord Øya Fyr
    df_tek['DNMI_75410...........T0015A3-0120'],
    df_arome['/arome_windvel_6481_1056'],
    
    # Rørvik Lufthavn 
    df_tek['DNMI_75220...........T0015A3-0120'],
    df_arome['/arome_windvel_6482_1114'],
    
    # Øy i nord
    df_tek['DNMI_75550...........T0015A3-0120'],
    df_arome['/arome_windvel_6520_1098'],

    
    # Nærmeste stormpunkt
    tek_2_hour_future_pred['STORM-YVik1-Vindhast-25km'],
    tek_2_hour_future_pred['STORM-YVik1-Vindretn-25km'],
    
    # Nærmeste aromepunkt
    arome_2_hour_future_pred['/arome_windvel_6491_1087'],
    arome_2_hour_future_pred['/arome_airtemp_6491_1087'],
    arome_2_hour_future_pred['/arome_airtemp_6495_1081'],
    arome_2_hour_future_pred['/arome_airpress_6495_1081'], 
    arome_2_hour_future_pred['/arome_winddir_6495_1081'],
    arome_2_hour_future_pred['/arome_windvel_6495_1081'],
    
    arome_2_hour_future_pred['/arome_airtemp_6495_1096'],
    arome_2_hour_future_pred['/arome_airpress_6495_1096'], 
    arome_2_hour_future_pred['/arome_winddir_6495_1096'],
    arome_2_hour_future_pred['/arome_windvel_6495_1096'],

    arome_2_hour_future_pred['/arome_airtemp_6491_1087'],
    arome_2_hour_future_pred['/arome_airpress_6491_1087'], 
    arome_2_hour_future_pred['/arome_winddir_6491_1087'],
    arome_2_hour_future_pred['/arome_windvel_6491_1087'],
    
    arome_2_hour_future_pred['/arome_airtemp_6486_1082'],
    arome_2_hour_future_pred['/arome_airpress_6486_1082'], 
    arome_2_hour_future_pred['/arome_winddir_6486_1082'],
    arome_2_hour_future_pred['/arome_windvel_6486_1082'],
    
    arome_2_hour_future_pred['/arome_airtemp_6486_1093'],
    arome_2_hour_future_pred['/arome_airpress_6486_1093'], 
    arome_2_hour_future_pred['/arome_winddir_6486_1093'],
    arome_2_hour_future_pred['/arome_windvel_6486_1093'],   
    
    # Target value
    tek_2_hour_future_pred['Target']
    
   ], 
   axis = 1)

In [21]:
data_single.to_csv('data_ytrevikna_single.csv', sep=';', index = False)

### Benchmarke single

In [22]:
model_value = 'YVIK-YtreVikna1-G17-T4015A3 -0104'
target_value = 'Target'

sammenligning = pd.concat([df_tek[model_value],tek_2_hour_future_pred[target_value]], axis=1)
sammenligning.dropna(inplace= True)

from sklearn import metrics

print('Mean Absolute Error: \t\t\t', metrics.mean_absolute_error(sammenligning[target_value], sammenligning[model_value]))
print('Mean Squared Error: \t\t\t', metrics.mean_squared_error(sammenligning[target_value], sammenligning[model_value]))
print('Root Mean Squared Error: \t\t', np.sqrt(metrics.mean_squared_error(sammenligning[target_value], sammenligning[model_value])))

Mean Absolute Error: 			 0.217451918096
Mean Squared Error: 			 0.109038791572
Root Mean Squared Error: 		 0.3302102233


# Mer manuell feature engineering

#### Lage datasett for en time frem

In [None]:
data_simple = pd.concat([

    # Sum produksjon
    df_tek['YVIK-YtreVikna1-Sum-produksjon'], 
    
    
    
    # Værstasjoner (I sammenkomst med værmelding (arome) samme punkt )
                
    # Nord Øya Fyr
    df_tek['DNMI_75410...........T0015A3-0120'],
    df_arome['/arome_windvel_6481_1056'],
    
    # Rørvik Lufthavn 
    df_tek['DNMI_75220...........T0015A3-0120'],
    df_arome['/arome_windvel_6482_1114'],
    
    # Øy i nord
    df_tek['DNMI_75550...........T0015A3-0120'],
    df_arome['/arome_windvel_6520_1098'],       
            
    
                
    # Nærmeste stormpunkt
    tek_2_hour_future_pred['STORM-YVik1-Vindhast-25km'],
    tek_2_hour_future_pred['STORM-YVik1-Vindretn-25km'],
                
                
    # Nærmeste aromepunkter
    arome_2_hour_future_pred['/arome_windvel_6495_1081'],
                
    arome_2_hour_future_pred['/arome_windvel_6495_1096'],
            
    arome_2_hour_future_pred['/arome_airtemp_6491_1087'],              
    arome_2_hour_future_pred['/arome_windvel_6491_1087'],
                              
    arome_2_hour_future_pred['/arome_windvel_6486_1082'],
                           
    arome_2_hour_future_pred['/arome_windvel_6486_1093'],   
    
    
    
    # Target value
    tek_2_hour_future_pred['Target']
    
    
    ], 
    axis = 1)