In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import json

from sklearn.linear_model import LinearRegression
from urllib.request import Request, urlopen


In [2]:
#Retrieving data from NVE
def get_NVE_data():
    """A function to retrieve data from NVE

    Returns:
        A list with a dictionary containing the results.
    """
    #Hardcoded parameters can be passed as variables.
    api_key = 'VERauKbJ40GNb4FmcthF0Q== '
    station = '12.298.0'
    parameter = '1003' 
    resolution_time = '60'                   #Hourly seems to be the highest resolution
    reference_time = '2021-01-01/2021-12-31'

    baseurl = "https://hydapi.nve.no/api/v1/Observations?StationId={station}&Parameter={parameter}&ResolutionTime={resolution_time}"

    url = baseurl.format(station=station, parameter=parameter,
                         resolution_time=resolution_time)
    
    #If reference_time parameter is provided, then update the url.
    if reference_time is not None:
        url = "{url}&ReferenceTime={reference_time}".format(
        url=url, reference_time=reference_time)

    print('Retrieving data from:', url)

    request_headers = {
        "Accept": "application/json",
        "X-API-Key": api_key
    }

    request = Request(url, headers=request_headers)
    response = urlopen(request)
    content = response.read().decode('utf-8')
    parsed_result = json.loads(content)
    
    return parsed_result['data']
        

In [3]:
#Retrievining the data from the local csv-file containing stator temperatures and effects.

data_2021 = pd.read_csv('data_2021.csv')
data_2021['timestamp'] = pd.to_datetime(data_2021['timestamp'], utc=True)
data_2021.set_index('timestamp', inplace=True)   
data_2021
   

Unnamed: 0_level_0,id,value
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-12-31 23:00:00+00:00,STATOR_TEMP_L3,7.937055
2020-12-31 23:00:00+00:00,AKTIV_EFFEKT,0.007787
2020-12-31 23:00:00+00:00,STATOR_TEMP_L1,8.496478
2020-12-31 23:00:00+00:00,STATOR_TEMP_L2,6.100000
2020-12-31 23:05:00+00:00,STATOR_TEMP_L3,7.937055
...,...,...
2021-11-28 23:50:00+00:00,AKTIV_EFFEKT,0.007001
2021-11-28 23:55:00+00:00,STATOR_TEMP_L1,14.632553
2021-11-28 23:55:00+00:00,STATOR_TEMP_L3,13.902348
2021-11-28 23:55:00+00:00,STATOR_TEMP_L2,12.200000


In [4]:
# Reshaping dataframe based on column values in "id"-column and interpolating to hourly resolution
data = data_2021.pivot(columns='id', values='value')
data = data.resample('H').interpolate()
 
data.head()


id,AKTIV_EFFEKT,STATOR_TEMP_L1,STATOR_TEMP_L2,STATOR_TEMP_L3
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-12-31 23:00:00+00:00,0.007787,8.496478,6.1,7.937055
2021-01-01 00:00:00+00:00,0.007787,8.496478,6.1,7.937055
2021-01-01 01:00:00+00:00,0.007787,8.496478,6.1,7.937055
2021-01-01 02:00:00+00:00,0.007787,8.496478,6.1,7.937055
2021-01-01 03:00:00+00:00,0.007787,8.496478,6.1,7.937055


In [11]:
NVE_data = get_NVE_data()
NVE_data

Retrieving data from: https://hydapi.nve.no/api/v1/Observations?StationId=12.298.0&Parameter=1003&ResolutionTime=60&ReferenceTime=2021-01-01/2021-12-31


[{'stationId': '12.298.0',
  'stationName': 'Drammenselva v/Døvikfoss kraftstasjon',
  'parameter': 1003,
  'parameterName': 'Vanntemperatur',
  'parameterNameEng': 'Water temperature',
  'serieVersionNo': 4,
  'method': 'Mean',
  'unit': '°C',
  'observationCount': 8737,
  'observations': [{'time': '2021-01-01T00:00:00Z',
    'value': 3.23,
    'correction': 0,
    'quality': 2},
   {'time': '2021-01-01T01:00:00Z',
    'value': 3.22375,
    'correction': 0,
    'quality': 2},
   {'time': '2021-01-01T02:00:00Z',
    'value': 3.235,
    'correction': 0,
    'quality': 2},
   {'time': '2021-01-01T03:00:00Z',
    'value': 3.2225,
    'correction': 0,
    'quality': 2},
   {'time': '2021-01-01T04:00:00Z',
    'value': 3.21875,
    'correction': 0,
    'quality': 2},
   {'time': '2021-01-01T05:00:00Z',
    'value': 3.21,
    'correction': 0,
    'quality': 2},
   {'time': '2021-01-01T06:00:00Z',
    'value': 3.2,
    'correction': 0,
    'quality': 2},
   {'time': '2021-01-01T07:00:00Z',
  

In [14]:
#Retrieving data from NVE
NVE_data = get_NVE_data()


NVE_df = pd.DataFrame(NVE_data[0]['observations'])
NVE_df.drop(['correction', 'quality'], axis=1, inplace = True)  #Dropping uneccesary columns
NVE_df['time'] = pd.to_datetime(NVE_df['time'])
NVE_df.set_index('time', inplace=True)

NVE_df.head()

Retrieving data from: https://hydapi.nve.no/api/v1/Observations?StationId=12.298.0&Parameter=1003&ResolutionTime=60&ReferenceTime=2021-01-01/2021-12-31


Unnamed: 0_level_0,value
time,Unnamed: 1_level_1
2021-01-01 00:00:00+00:00,3.23
2021-01-01 01:00:00+00:00,3.22375
2021-01-01 02:00:00+00:00,3.235
2021-01-01 03:00:00+00:00,3.2225
2021-01-01 04:00:00+00:00,3.21875


## Merging NVE data with csv-data

In [6]:
data['WATER_TERMPERATURE'] = NVE_df['value']

data.dropna(inplace=True)

data.head(5)

id,AKTIV_EFFEKT,STATOR_TEMP_L1,STATOR_TEMP_L2,STATOR_TEMP_L3,WATER_TERMPERATURE
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-01-01 00:00:00+00:00,0.007787,8.496478,6.1,7.937055,3.23
2021-01-01 01:00:00+00:00,0.007787,8.496478,6.1,7.937055,3.22375
2021-01-01 02:00:00+00:00,0.007787,8.496478,6.1,7.937055,3.235
2021-01-01 03:00:00+00:00,0.007787,8.496478,6.1,7.937055,3.2225
2021-01-01 04:00:00+00:00,0.007787,8.496478,6.1,7.937055,3.21875


In [7]:
# Defining what values to use as features and what values to predict
# When training the model, we want to use data from 2021-01-01 to 2021-07-01 
# When predicting, we want to use data from 2021-07-02 to 2021-12-31


training_start_date = '2021-01-01'
training_end_date = '2021-07-01'
predicting_start_date = '2021-07-02'
predicting_end_date = '2021-12-31'

# Filter data within the specified datetime range
training_data = data.loc[training_start_date:training_end_date]
testing_data = data.loc[predicting_start_date:predicting_end_date]

# Select features (X) and target variable (Y) from the filtered data
X = training_data[['AKTIV_EFFEKT', 'WATER_TERMPERATURE']]
Y = training_data['STATOR_TEMP_L1']

# Creating the linear model
ML1 = LinearRegression().fit(X, Y)

In [8]:
#Predicting the values and placing it in a separate column in the data dataframe.
testing_data['Prediction'] = ML1.predict(testing_data[['AKTIV_EFFEKT', 'WATER_TERMPERATURE']])

fig = px.line()

fig.add_trace(
                go.Scatter(
                    x=testing_data.index,
                    y=testing_data['STATOR_TEMP_L1'],
                    mode="lines",
                    name='STATOR_TEMP_L1',
                    line=dict(color="blue"),
                )
            )
fig.add_trace(
                go.Scatter(
                    x=testing_data.index,
                    y=testing_data['Prediction'],
                    mode="lines",
                    name='Prediction',
                    line=dict(color="green"),
                )
            )
fig.show()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  testing_data['Prediction'] = ML1.predict(testing_data[['AKTIV_EFFEKT', 'WATER_TERMPERATURE']])


In [9]:
testing_data['Difference'] =  testing_data['STATOR_TEMP_L1'] - testing_data['Prediction']
difference_fig = px.line()
difference_fig.add_trace(
                go.Scatter(
                    x=testing_data.index,
                    y=testing_data['Difference'],
                    mode="lines",
                    name='Difference',
                    line=dict(color="green"),
                )
            )



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

