```yaml
titan: v1
service:
  image: scipy
  machine:
    cpu: 2
    memory: 512MB
```

In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
import json

In [4]:
# Reading the dataset from a Gitlab repo
weather = pd.read_csv('weather_data_GER_2016.csv')

In [8]:
weather

Unnamed: 0,timestamp,cumulated hours,lat,lon,v1,v2,v_50m,h1,h2,z0,SWTDN,SWGDN,T,rho,p
0,2016-01-01T00:00:00Z,0,47.5,5.625,0.81,1.88,3.36,2,10,0.052526,0.0,0.0,277.350159,1.236413,99282.710938
1,2016-01-01T01:00:00Z,1,47.5,5.625,0.77,1.61,2.63,2,10,0.052510,0.0,0.0,277.025665,1.239390,99300.164062
2,2016-01-01T02:00:00Z,2,47.5,5.625,0.66,1.22,1.89,2,10,0.052495,0.0,0.0,277.223755,1.243861,99310.992188
3,2016-01-01T03:00:00Z,3,47.5,5.625,0.96,1.35,1.62,2,10,0.052480,0.0,0.0,277.133240,1.247390,99314.773438
4,2016-01-01T04:00:00Z,4,47.5,5.625,1.14,1.56,1.83,2,10,0.052480,0.0,0.0,276.867767,1.248869,99324.796875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2248699,2016-12-31T19:00:00Z,8779,55.0,15.000,10.45,12.60,14.26,2,10,0.001006,0.0,0.0,278.549011,1.272779,101755.281250
2248700,2016-12-31T20:00:00Z,8780,55.0,15.000,10.52,12.69,14.39,2,10,0.001021,0.0,0.0,278.574341,1.271909,101698.242188
2248701,2016-12-31T21:00:00Z,8781,55.0,15.000,10.49,12.65,14.38,2,10,0.001014,0.0,0.0,278.634644,1.270996,101653.398438
2248702,2016-12-31T22:00:00Z,8782,55.0,15.000,10.47,12.63,14.41,2,10,0.001009,0.0,0.0,278.678162,1.270252,101609.484375


In [5]:
weather.head()

Unnamed: 0,timestamp,cumulated hours,lat,lon,v1,v2,v_50m,h1,h2,z0,SWTDN,SWGDN,T,rho,p
0,2016-01-01T00:00:00Z,0,47.5,5.625,0.81,1.88,3.36,2,10,0.052526,0.0,0.0,277.350159,1.236413,99282.710938
1,2016-01-01T01:00:00Z,1,47.5,5.625,0.77,1.61,2.63,2,10,0.05251,0.0,0.0,277.025665,1.23939,99300.164062
2,2016-01-01T02:00:00Z,2,47.5,5.625,0.66,1.22,1.89,2,10,0.052495,0.0,0.0,277.223755,1.243861,99310.992188
3,2016-01-01T03:00:00Z,3,47.5,5.625,0.96,1.35,1.62,2,10,0.05248,0.0,0.0,277.13324,1.24739,99314.773438
4,2016-01-01T04:00:00Z,4,47.5,5.625,1.14,1.56,1.83,2,10,0.05248,0.0,0.0,276.867767,1.248869,99324.796875


Next, we read the weather data for Germany in 2016 by reading the full csv file.

The data in the file contains the following:

* wind
  * v1: velocity [m/s] @ height h1 (2 meters above displacement height)
  * v2: velocity [m/s] @ height h2 (10 meters above displacement height)
  * v_50m: velocity [m/s] @ 50 meters above ground
  * h1: height above ground [m] (h1 = displacement height +2m)
  * h2: height above ground [m] (h2 = displacement height +10m)
  * z0: roughness length [m]
* solar parameters:
  * SWTDN: total top-of-the-atmosphere horizontal radiation [W/m²]
  * SWGDN: total ground horizontal radiation [W/m²]
* temperature data
  * T: Temperature [K] @ 2 meters above displacement height (see h1)
* air data
  * Rho: air density [kg/m³] @ surface
  *p: air pressure [Pa] @ surface

In [9]:
# Reading the dataset from a Gitlab repo
url = "https://storage.googleapis.com/tutorial-datasets/time_series_60min_singleindex_filtered.csv"
production = pd.read_csv(url)


In [11]:
production


Unnamed: 0,utc_timestamp,cet_cest_timestamp,DE_wind_generation_actual
0,2015-12-31T23:00:00Z,2016-01-01T00:00:00+0100,8638
1,2016-01-01T00:00:00Z,2016-01-01T01:00:00+0100,8579
2,2016-01-01T01:00:00Z,2016-01-01T02:00:00+0100,8542
3,2016-01-01T02:00:00Z,2016-01-01T03:00:00+0100,8443
4,2016-01-01T03:00:00Z,2016-01-01T04:00:00+0100,8295
...,...,...,...
8779,2016-12-31T18:00:00Z,2016-12-31T19:00:00+0100,15196
8780,2016-12-31T19:00:00Z,2016-12-31T20:00:00+0100,15303
8781,2016-12-31T20:00:00Z,2016-12-31T21:00:00+0100,15354
8782,2016-12-31T21:00:00Z,2016-12-31T22:00:00+0100,14848


In [22]:
# Merge datasets
weather_by_day = weather.groupby(weather.index).mean()
combined = pd.merge(production, weather_by_day, how='left', left_index=True, right_index=True)
weather

Unnamed: 0,timestamp,cumulated hours,lat,lon,v1,v2,v_50m,h1,h2,z0,SWTDN,SWGDN,T,rho,p
0,2016-01-01T00:00:00Z,0,47.5,5.625,0.81,1.88,3.36,2,10,0.052526,0.0,0.0,277.350159,1.236413,99282.710938
1,2016-01-01T01:00:00Z,1,47.5,5.625,0.77,1.61,2.63,2,10,0.052510,0.0,0.0,277.025665,1.239390,99300.164062
2,2016-01-01T02:00:00Z,2,47.5,5.625,0.66,1.22,1.89,2,10,0.052495,0.0,0.0,277.223755,1.243861,99310.992188
3,2016-01-01T03:00:00Z,3,47.5,5.625,0.96,1.35,1.62,2,10,0.052480,0.0,0.0,277.133240,1.247390,99314.773438
4,2016-01-01T04:00:00Z,4,47.5,5.625,1.14,1.56,1.83,2,10,0.052480,0.0,0.0,276.867767,1.248869,99324.796875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2248699,2016-12-31T19:00:00Z,8779,55.0,15.000,10.45,12.60,14.26,2,10,0.001006,0.0,0.0,278.549011,1.272779,101755.281250
2248700,2016-12-31T20:00:00Z,8780,55.0,15.000,10.52,12.69,14.39,2,10,0.001021,0.0,0.0,278.574341,1.271909,101698.242188
2248701,2016-12-31T21:00:00Z,8781,55.0,15.000,10.49,12.65,14.38,2,10,0.001014,0.0,0.0,278.634644,1.270996,101653.398438
2248702,2016-12-31T22:00:00Z,8782,55.0,15.000,10.47,12.63,14.41,2,10,0.001009,0.0,0.0,278.678162,1.270252,101609.484375


In [21]:
# combined
weather_by_day

Unnamed: 0,cumulated hours,lat,lon,v1,v2,v_50m,h1,h2,z0,SWTDN,SWGDN,T,rho,p
0,0,47.5,5.625,0.81,1.88,3.36,2,10,0.052526,0.0,0.0,277.350159,1.236413,99282.710938
1,1,47.5,5.625,0.77,1.61,2.63,2,10,0.052510,0.0,0.0,277.025665,1.239390,99300.164062
2,2,47.5,5.625,0.66,1.22,1.89,2,10,0.052495,0.0,0.0,277.223755,1.243861,99310.992188
3,3,47.5,5.625,0.96,1.35,1.62,2,10,0.052480,0.0,0.0,277.133240,1.247390,99314.773438
4,4,47.5,5.625,1.14,1.56,1.83,2,10,0.052480,0.0,0.0,276.867767,1.248869,99324.796875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2248699,8779,55.0,15.000,10.45,12.60,14.26,2,10,0.001006,0.0,0.0,278.549011,1.272779,101755.281250
2248700,8780,55.0,15.000,10.52,12.69,14.39,2,10,0.001021,0.0,0.0,278.574341,1.271909,101698.242188
2248701,8781,55.0,15.000,10.49,12.65,14.38,2,10,0.001014,0.0,0.0,278.634644,1.270996,101653.398438
2248702,8782,55.0,15.000,10.47,12.63,14.41,2,10,0.001009,0.0,0.0,278.678162,1.270252,101609.484375


In [23]:
combined

Unnamed: 0,utc_timestamp,cet_cest_timestamp,DE_wind_generation_actual,cumulated hours,lat,lon,v1,v2,v_50m,h1,h2,z0,SWTDN,SWGDN,T,rho,p
0,2015-12-31T23:00:00Z,2016-01-01T00:00:00+0100,8638,0,47.5,5.625,0.81,1.88,3.36,2,10,0.052526,0.0,0.0,277.350159,1.236413,99282.710938
1,2016-01-01T00:00:00Z,2016-01-01T01:00:00+0100,8579,1,47.5,5.625,0.77,1.61,2.63,2,10,0.052510,0.0,0.0,277.025665,1.239390,99300.164062
2,2016-01-01T01:00:00Z,2016-01-01T02:00:00+0100,8542,2,47.5,5.625,0.66,1.22,1.89,2,10,0.052495,0.0,0.0,277.223755,1.243861,99310.992188
3,2016-01-01T02:00:00Z,2016-01-01T03:00:00+0100,8443,3,47.5,5.625,0.96,1.35,1.62,2,10,0.052480,0.0,0.0,277.133240,1.247390,99314.773438
4,2016-01-01T03:00:00Z,2016-01-01T04:00:00+0100,8295,4,47.5,5.625,1.14,1.56,1.83,2,10,0.052480,0.0,0.0,276.867767,1.248869,99324.796875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8779,2016-12-31T18:00:00Z,2016-12-31T19:00:00+0100,15196,43,50.5,13.750,4.54,6.63,8.44,2,10,0.130437,0.0,0.0,269.227844,1.251957,96675.328125
8780,2016-12-31T19:00:00Z,2016-12-31T20:00:00+0100,15303,44,50.5,13.750,4.45,6.50,8.31,2,10,0.130437,0.0,0.0,268.955200,1.253008,96665.226562
8781,2016-12-31T20:00:00Z,2016-12-31T21:00:00+0100,15354,45,50.5,13.750,4.52,6.61,8.56,2,10,0.130376,0.0,0.0,268.782166,1.253535,96648.695312
8782,2016-12-31T21:00:00Z,2016-12-31T22:00:00+0100,14848,46,50.5,13.750,4.66,6.82,8.80,2,10,0.130315,0.0,0.0,268.605621,1.254059,96623.734375


In [17]:
# Apply linear regression
lr = LinearRegression()
X_wind = combined[['v1', 'v2', 'v_50m', 'z0']]
#X_wind = combined[['v1', 'v2', 'v_50m']]
#X_wind = combined[['v1', 'v2']]
y_wind = combined['DE_wind_generation_actual']

model = lr.fit(X_wind, y_wind)

In [18]:
# Now we can see the coefficients of our model
print(f'alpha = {model.intercept_}')
print(f'betas = {model.coef_}')

alpha = 7572.20178646834
betas = [-3430.30457276  2906.42801897  -183.70361435   142.53220298]


In [19]:
# GET /alphas
print(f'alpha = {model.intercept_}')

alpha = 7572.20178646834


In [20]:
# GET /betas
print(f'betas = {model.coef_}')

betas = [-3430.30457276  2906.42801897  -183.70361435   142.53220298]


In [21]:
# Mock request object for local API testing
headers = {
'content-type': 'application/json'
}
body = json.dumps({
  "data": [[1.44, 1.77, 2, 0.054]]
})
REQUEST = json.dumps({ 'headers': headers, 'body': body })

In [None]:
# POST /prediction
body = json.loads(REQUEST)['body']
# predict the cluster for new samples. Function to be exposed through Titan
input_params = json.loads(body)['data']
#input_params = [[0.44, 1.77, 2, 0.054]]
print(model.predict(input_params))