```yaml
titan: v1
service:
  image: scipy
  machine:
    cpu: 4
    memory: 2048MB
  command:
      - !pip install requirements.txt

```

In [14]:
import pandas as pd
import json
from sklearn.ensemble import RandomForestRegressor
from sklearn import pipeline, preprocessing, impute, compose
from sklearn.model_selection import train_test_split

tree_regressors = {
    "Random_forest_regressor": RandomForestRegressor(),
        }

In [15]:
# Reading the dataset from a Gitlab repo
url = "https://storage.googleapis.com/tutorial-datasets/weather_data_GER_2016.csv"
weather = pd.read_csv(url)

In [16]:
# Reading the dataset from a Gitlab repo
url = "https://storage.googleapis.com/tutorial-datasets/time_series_60min_singleindex_filtered.csv"
production = pd.read_csv(url)


In [17]:
# Merge datasets
weather_by_day = weather.groupby(weather.index).mean()
combined = pd.merge(production, weather_by_day, how='left', left_index=True, right_index=True)

In [18]:
num_vars = ['v1', 'v2', 'v_50m', 'z0','T', 'rho', 'p', 'lat', 'lon']
X_wind = combined[num_vars]
#X_wind = combined[['v1', 'v2', 'v_50m']]
#X_wind = combined[['v1', 'v2']]
y_wind = combined['DE_wind_generation_actual']
cat_vars = []


In [19]:
x_train, x_val, y_train, y_val = train_test_split(X_wind, y_wind, test_size=0.3, random_state=909)

In [20]:
num_4_treeModels = pipeline.Pipeline(steps=[
    ('imputer', impute.SimpleImputer(strategy='mean', add_indicator=False))]) # mean, median

cat_4_treeModels = pipeline.Pipeline(steps=[
    ('imputer', impute.SimpleImputer(strategy='constant', fill_value='missing')),])

tree_prepro = compose.ColumnTransformer(transformers=[
    ('num', num_4_treeModels, num_vars),
    ('cat', cat_4_treeModels, cat_vars),],
    remainder='drop')
                          

In [21]:
all_tree_pipes = {name: pipeline.make_pipeline(tree_prepro, model) for name, model in tree_regressors.items()}

In [22]:
# Model fitting and initialization
model = all_tree_pipes["Random_forest_regressor"]
model.fit(X_wind, y_wind)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('num',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer())]),
                                                  ['v1', 'v2', 'v_50m', 'z0',
                                                   'T', 'rho', 'p', 'lat',
                                                   'lon']),
                                                 ('cat',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(fill_value='missing',
                                                                                 strategy='constant'))]),
                                                  [])])),
                ('randomforestregressor', RandomForestRegressor())])

In [23]:
# Mock request object for local API testing 
headers = {
'content-type': 'application/json'
}
body = json.dumps({
  "data": [[1.44, 1.77, 2, 0.054, 277.35, 1.229, 99300.89123, 47.5, 6.25]]
})
REQUEST = json.dumps({ 'headers': headers, 'body': body })

In [24]:
# POST /prediction
body = json.loads(REQUEST)['body']
# predict the cluster for new samples. Function to be exposed through Titan
input_params = json.loads(body)['data']

#input_params = [[0.44, 1.77, 2, 0.054]]
d = pd.DataFrame(input_params, columns=['v1', 'v2', 'v_50m', 'z0','T', 'rho', 'p', 'lat', 'lon'])
print(model.predict(d))

[10460.46]
