# Datasett Valsneset

Tar inn de nye datasettene for å finne ut hva som er forskjellig/likt

#### Importerer biblioteker

In [1]:
import pandas as pd
import numpy as np

# Visualization
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

# Making plots look better
import matplotlib as mpl
mpl.rcParams['patch.force_edgecolor'] = True

%config InlineBackend.figure_format = 'retina'

#### Henter ut ulike datasett

In [2]:
# TEK (13. Juli 2017 til 16. Desember 2018)
df_tek = pd.read_csv('vindkraft 130717-160218 TEK met.csv',sep =';', low_memory = False)

# Arome (1. Spetember til 16. Desember)
df_arome = pd.read_csv('vindkraft 130717-160218 arome korr winddir.csv',sep =';', low_memory = False)

In [3]:
df_tek.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5239 entries, 0 to 5238
Columns: 309 entries, Unnamed: 0 to AnmeldingskorreksjonYtreVikna_ML_train
dtypes: float64(224), int64(85)
memory usage: 12.4 MB


In [4]:
df_arome.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5239 entries, 0 to 5238
Columns: 489 entries, Unnamed: 0 to /arome_windvel_6573_1232
dtypes: float64(488), object(1)
memory usage: 19.5+ MB


## Fikser 2 timer frem med værdata

#### Arome

In [5]:
# Lager en ny dataframe
arome_2_hour_future_pred = df_arome.copy(deep=True)

# Forskyver to rader ved å fjerne de to første radene
arome_2_hour_future_pred.drop([0,1], inplace=True)

# Nullindekserer radene
arome_2_hour_future_pred.reset_index(inplace=True)

arome_2_hour_future_pred.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5237 entries, 0 to 5236
Columns: 490 entries, index to /arome_windvel_6573_1232
dtypes: float64(488), int64(1), object(1)
memory usage: 19.6+ MB


#### Tek (2 timer frem)

In [6]:
# Lager en ny dataframe
tek_2_hour_future_pred = df_tek.copy(deep=True)

# Forskyver to rader ved å fjerne de to første radene
tek_2_hour_future_pred.drop([0,1], inplace=True)

# Nullindekserer radene
tek_2_hour_future_pred.reset_index(inplace=True)

tek_2_hour_future_pred.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5237 entries, 0 to 5236
Columns: 310 entries, index to AnmeldingskorreksjonYtreVikna_ML_train
dtypes: float64(224), int64(86)
memory usage: 12.4 MB


#### Tilpasser eksisterende datasett

In [7]:
df_tek.drop([5237,5238], inplace= True)

In [8]:
df_arome.drop([5237,5238], inplace= True)

In [9]:
tek_2_hour_future_pred['Target'] = tek_2_hour_future_pred['VALS-Valsneset..-GS1-5-T4015A3 -0104']

# ' data_valsneset_simple.csv'

In [10]:
data_simple = pd.concat([

    # Sum produksjon
    df_tek['VALS-Valsneset..-GS1-5-T4015A3 -0104'], 
    
    
    
    # Værstasjoner (I sammenkomst med værmelding (arome) samme punkt )
                

                
    # Nærmeste stormpunkt
    tek_2_hour_future_pred['STORM-Vals-Vindhast-25km'],
    tek_2_hour_future_pred['STORM-Vals-Vindretn-25km'],
                
                
    # Nærmeste aromepunkter

    
    # Target value
    tek_2_hour_future_pred['Target']
    
    
    ], 
    axis = 1)

In [11]:
data_simple.to_csv('data_valsneset_simple.csv', sep=';', index = False)

# 'data_valsneset_advanced.csv'

In [12]:
data_advanced = pd.concat([
                
    # De 5 ulike turbinenen (produksjon + status)
    df_tek['VALS-Valsneset..-G1-T4015A3 -0104'],
    df_tek['VALS-Valsneset..-G2-T4015A3 -0104'],
    df_tek['VALS-Valsneset..-G3-T4015A3 -0104'],
    df_tek['VALS-Valsneset..-G4-T4015A3 -0104'],
    df_tek['VALS-Valsneset..-G5-T4015A3 -0104'],
    
    # Error codes 5 turbinene
    df_tek['RRS.S2451.Gunit.M1 G1.AVL'],
    df_tek['RRS.S2451.Gunit.M2 G1.AVL'],
    df_tek['RRS.S2451.Gunit.M3 G1.AVL'],
    df_tek['RRS.S2451.Gunit.M4 G1.AVL'],
    df_tek['RRS.S2451.Gunit.M5 G1.AVL'],
    

    # Sum produksjon
    df_tek['VALS-Valsneset..-GS1-5-T4015A3 -0104'],  

    
    # Værstasjoner (korigert sammen med aromedata)
    
    # ørlandet ( koordinater: 63.705, 9.611)
    df_tek['DNMI_71550...........T0015A3-0120'],
    df_arome['/arome_windvel_6372_0961'],
    
    # Sula fyr (koordinater 63.847, 8.467)
    df_tek['DNMI_65940...........T0015A3-0120'],
    df_arome['/arome_windvel_6385_0846'],
    
    # Halten fyr (koordinater 64.173, 9.405)
    df_tek['DNMI_71850...........T0015A3-0120'],
    df_arome['/arome_windvel_6418_0942'],

    # Nærmeste stormpunkt
    tek_2_hour_future_pred['STORM-Vals-Vindhast-25km'],
    tek_2_hour_future_pred['STORM-Vals-Vindretn-25km'],

    # Nærmeste aromepunkt
    
    arome_2_hour_future_pred['/arome_windvel_6387_0958'],
    arome_2_hour_future_pred['/arome_winddir_6387_0958'],
    arome_2_hour_future_pred['/arome_airtemp_6387_0958'],
    
    arome_2_hour_future_pred['/arome_windvel_6385_0969'],
    arome_2_hour_future_pred['/arome_winddir_6385_0969'],
    arome_2_hour_future_pred['/arome_airtemp_6385_0969'],
    
    arome_2_hour_future_pred['/arome_windvel_6383_0964'],
    arome_2_hour_future_pred['/arome_winddir_6383_0964'],
    arome_2_hour_future_pred['/arome_airtemp_6383_0964'],
    
    arome_2_hour_future_pred['/arome_windvel_6378_0955'],
    arome_2_hour_future_pred['/arome_winddir_6378_0955'],
    arome_2_hour_future_pred['/arome_airtemp_6378_0955'],
    
    
    arome_2_hour_future_pred['/arome_windvel_6379_0970'],
    arome_2_hour_future_pred['/arome_winddir_6379_0970'],
    arome_2_hour_future_pred['/arome_airtemp_6379_0970'],
    
    

    # Target value
    tek_2_hour_future_pred['Target']
    
   ], 
   axis = 1)

In [13]:
data_advanced.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5237 entries, 0 to 5236
Data columns (total 35 columns):
VALS-Valsneset..-G1-T4015A3 -0104       5237 non-null float64
VALS-Valsneset..-G2-T4015A3 -0104       5237 non-null float64
VALS-Valsneset..-G3-T4015A3 -0104       5237 non-null float64
VALS-Valsneset..-G4-T4015A3 -0104       5237 non-null float64
VALS-Valsneset..-G5-T4015A3 -0104       5237 non-null float64
RRS.S2451.Gunit.M1 G1.AVL               5237 non-null int64
RRS.S2451.Gunit.M2 G1.AVL               5237 non-null int64
RRS.S2451.Gunit.M3 G1.AVL               5237 non-null int64
RRS.S2451.Gunit.M4 G1.AVL               5237 non-null int64
RRS.S2451.Gunit.M5 G1.AVL               5237 non-null int64
VALS-Valsneset..-GS1-5-T4015A3 -0104    5237 non-null float64
DNMI_71550...........T0015A3-0120       5237 non-null float64
/arome_windvel_6372_0961                5190 non-null float64
DNMI_65940...........T0015A3-0120       5237 non-null float64
/arome_windvel_6385_0846           

In [14]:
data_advanced.head()

Unnamed: 0,VALS-Valsneset..-G1-T4015A3 -0104,VALS-Valsneset..-G2-T4015A3 -0104,VALS-Valsneset..-G3-T4015A3 -0104,VALS-Valsneset..-G4-T4015A3 -0104,VALS-Valsneset..-G5-T4015A3 -0104,RRS.S2451.Gunit.M1 G1.AVL,RRS.S2451.Gunit.M2 G1.AVL,RRS.S2451.Gunit.M3 G1.AVL,RRS.S2451.Gunit.M4 G1.AVL,RRS.S2451.Gunit.M5 G1.AVL,...,/arome_windvel_6383_0964,/arome_winddir_6383_0964,/arome_airtemp_6383_0964,/arome_windvel_6378_0955,/arome_winddir_6378_0955,/arome_airtemp_6378_0955,/arome_windvel_6379_0970,/arome_winddir_6379_0970,/arome_airtemp_6379_0970,Target
0,1.37188,1.248195,0.0,1.235367,1.314127,1,1,1,1,1,...,5.752372,,12.05117,6.516601,,11.91494,5.248638,,10.4955,5.48814
1,1.450206,1.447913,0.0,1.272919,1.416992,1,1,1,1,1,...,5.767471,,12.01797,6.211858,,12.06338,5.077352,,10.8964,5.295362
2,1.431802,1.38006,0.0,1.2824,1.393878,1,1,1,1,1,...,5.840058,,11.31802,6.400788,,10.3058,5.389918,,9.75356,4.938806
3,1.355955,1.302725,0.0,1.340277,1.296405,1,1,1,1,1,...,5.801629,,12.1913,6.60801,,11.5795,5.233864,,10.39688,3.924535
4,1.258667,1.225081,0.0,1.276204,1.178854,1,1,1,1,1,...,5.746001,,11.7194,6.552452,,11.48013,5.111751,,10.3473,3.130494


In [15]:
data_advanced.to_csv('data_valsneset_advanced.csv', sep=';', index = False)

## Hva er benchmark for datasettet

In [16]:
model_value = 'VALS-Valsneset..-GS1-5-T4015A3 -0104'
target_value = 'Target'

In [17]:
sammenligning = pd.concat([df_tek[model_value],tek_2_hour_future_pred[target_value]], axis=1)
sammenligning.dropna(inplace= True)

In [18]:
from sklearn import metrics

print('Mean Absolute Error: \t\t\t', metrics.mean_absolute_error(sammenligning[target_value], sammenligning[model_value]))
print('Mean Squared Error: \t\t\t', metrics.mean_squared_error(sammenligning[target_value], sammenligning[model_value]))
print('Root Mean Squared Error: \t\t', np.sqrt(metrics.mean_squared_error(sammenligning[target_value], sammenligning[model_value])))

Mean Absolute Error: 			 1.26887425823
Mean Squared Error: 			 4.17679748833
Root Mean Squared Error: 		 2.04372148013
