In [4]:
import pandas as pd
import mlflow

import imports as ii
import functions as ff

## NOTE: You can use Microsoft Azure Machine Learning Studio for experiment tracking. Follow assignment description and 
# uncomment below for that (you might also need to pip azureml (pip install azureml-core):
from azureml.core import Workspace
ws = Workspace.from_config()
mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())

## NOTE: Optionally, you can use the public tracking server.  Do not use it for data you cannot afford to lose. See note in assignment text. If you leave this line as a comment, mlflow will save the runs to your local filesystem.

# mlflow.set_tracking_uri("http://training.itu.dk:5000/")

# TODO: Set the experiment name
#mlflow.set_experiment("<ITU Username> - <Descriptive experiment name>")

# Import some of the sklearn modules you are likely to use.
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

mlflow.set_tracking_uri("https://training.itu.dk:5000/")


mlflow.set_experiment("gegy - wind_prediction")
days = 365*2
time = ff.time()
with mlflow.start_run(run_name="experiment_" + time):
    ### Get last x days of data

    gen_df, wind_df = ff.query(days)

    ### get rid of non used columns and null values
    gen_df, wind_df = ff.clean_df(gen_df, wind_df)

    ### Aggregate gen_df for 3 hours by mean
    energy = ff.aggregate(gen_df)

    ### transform wind directions strings into int
    wind_df = ff.wind_direction_transformer(wind_df)

    ### merge dataframes
    data = energy.merge(wind_df, on='time')

    ### data_finalizer
    final_data = ff.data_finalizer(data)

    ### make a train/test split
    X = final_data[['Speed', 'Angle']]
    Y = final_data['Energy']
    x_train, x_test, y_train, y_test = ii.train_test_split(X,Y, test_size=.25, random_state=420)

    ### Searching best params
    print("Grid Search is running")
    search = ii.GridSearchCV(ff.pipe, ff.param_grid, n_jobs=4)
    search.fit(x_train, y_train)
    print("Best parameter (CV score=%0.3f):" % search.best_score_)
    mlflow.log_param("CV_score",search.best_score_)
    print(search.best_params_)
    algo, neighbours = search.best_params_.values()

    ### use best parameters
    print("Retrain, predict and store the results and the model")
    ff.pipe.set_params(KNN__n_neighbors=neighbours,KNN__algorithm=algo)
    model = ff.pipe.fit(x_train, y_train)
    
    # demo prediction
    preds = model.predict(x_test)
    print(" R2 score of the loaded model is %0.2f "% ii.r2_score(y_test, preds))

    ### Get newest data, predict, and store the model
    print("Load latest forecasts, make a prediction of Generated Power")
    forecast, x_forecast = ff.load_forecast()
    result = model.predict(x_forecast)
    forecast["Generated Power Prediction"] = result
    print("###### Predictions")
    display(forecast[0:5])

    ### SAVE predictions
    try:
        ff.save_predictions(forecast)
    except:
        pass

    ### SAVE MODEL
    try:
        ii.pickle.dump(model,open("models/model"+time+".p", "wb" ) )
        model = ii.pickle.load( open( "models/model"+time+".p", "rb" ) )
        preds = model.predict(x_test)
        ### R2 score for the test with the loaded model:
        print(" R2 score of the loaded model is %0.2f "% ii.r2_score(y_test, preds))
    except:
        pass
        
    mlflow.log_metric("days", days)
    mlflow.log_metric("prediction_time", int(time.replace("-", "")))
    mlflow.log_metric("KNN_neighbors", neighbours)
    #mlflow.log_metric("KNN_algorithm", algo)
    mlflow.log_metric("Score", ii.r2_score(y_test, preds))
    mlflow.log_param("Model method",algo)
    mlflow.log_param("best_model","KNeighborsRegressor")

    #mlflow.sklearn.log_model(model,"KNN", conda_env=conda_env)
mlflow.end_run()

Query is running
Dataframe cleaning is running
Wind direction tranformer is running
Grid Search is running
Best parameter (CV score=0.725):
{'KNN__algorithm': 'brute', 'KNN__n_neighbors': 22}
Retrain, predict and store the results and the model
 R2 score of the loaded model is 0.74 
Load latest forecasts, make a prediction of Generated Power
Wind direction tranformer is running
###### Predictions


Unnamed: 0_level_0,Direction,Lead_hours,Source_time,Speed,Angle,Generated Power Prediction
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-05-19 12:00:00+00:00,SW,2,1652947200,7.15264,225.0,13.662877
2022-05-19 15:00:00+00:00,SSW,5,1652947200,4.02336,202.5,4.761094
2022-05-19 18:00:00+00:00,S,8,1652947200,4.02336,180.0,6.479517
2022-05-19 21:00:00+00:00,SSE,11,1652947200,5.81152,157.5,9.391857
2022-05-20 00:00:00+00:00,S,14,1652947200,5.81152,180.0,12.419416


In [5]:
mlflow.search_runs()

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.KNN_neighbors,metrics.days,metrics.prediction_time,metrics.Score,params.best_model,params.CV_score,params.Model method,tags.mlflow.runName,tags.mlflow.source.name,tags.mlflow.user,tags.mlflow.source.type
0,4ffa28b2c67b4ebe857048d1c8cca81f,136,FINISHED,s3://training-storage/136/4ffa28b2c67b4ebe8570...,2022-05-19 11:26:20.611000+00:00,2022-05-19 11:27:01.787000+00:00,22.0,730.0,2022519000000.0,0.743067,KNeighborsRegressor,0.7246987924981244,brute,experiment_2022-5-19-13-26-20,c:\users\music\appdata\local\programs\python\p...,MUSIC,LOCAL
1,71fdafcfebc94b02baba1767dc444f64,136,FINISHED,s3://training-storage/136/71fdafcfebc94b02baba...,2022-05-19 11:24:41.014000+00:00,2022-05-19 11:24:58.720000+00:00,15.0,365.0,2022519000000.0,0.711102,KNeighborsRegressor,0.700275337777702,auto,experiment_2022-5-19-13-24-41,c:\users\music\appdata\local\programs\python\p...,MUSIC,LOCAL
2,7b16402044bb4a30a793a710cc5d1686,136,FINISHED,s3://training-storage/136/7b16402044bb4a30a793...,2022-05-19 11:23:38.836000+00:00,2022-05-19 11:23:52.914000+00:00,9.0,120.0,2022519000000.0,0.643414,KNeighborsRegressor,0.5711774627238106,ball_tree,experiment_2022-5-19-13-23-38,c:\users\music\appdata\local\programs\python\p...,MUSIC,LOCAL
3,3dce1393aab745bebdf478a684fa4b46,136,FINISHED,s3://training-storage/136/3dce1393aab745bebdf4...,2022-05-18 10:54:53.738000+00:00,2022-05-18 10:54:58.497000+00:00,3.0,40.0,2022518000000.0,0.566408,KNeighborsRegressor,0.5413345621125305,ball_tree,experiment_2022-5-18-12-54-53,c:\users\music\appdata\local\programs\python\p...,MUSIC,LOCAL
4,9ce3fc2ceb6c4ac98bf77b5d9db7461d,136,FINISHED,s3://training-storage/136/9ce3fc2ceb6c4ac98bf7...,2022-05-18 10:52:02.497000+00:00,2022-05-18 10:53:32.082000+00:00,36.0,2920.0,202251800000.0,0.72869,KNeighborsRegressor,0.750989051811944,brute,experiment_2022-5-18-12-52-2,c:\users\music\appdata\local\programs\python\p...,MUSIC,LOCAL
5,2df3a87abb4a4a309ee521b06d80fd1a,136,FINISHED,s3://training-storage/136/2df3a87abb4a4a309ee5...,2022-05-18 10:48:35.145000+00:00,2022-05-18 10:49:51.391000+00:00,36.0,1460.0,2022518000000.0,0.728679,KNeighborsRegressor,,brute,experiment_2022-5-18-12-48-35,c:\users\music\appdata\local\programs\python\p...,MUSIC,LOCAL
6,56b93c17e19f4195845a5a97ce99beaf,136,FINISHED,s3://training-storage/136/56b93c17e19f4195845a...,2022-05-18 10:47:46.908000+00:00,2022-05-18 10:48:03.269000+00:00,14.0,365.0,2022518000000.0,0.735744,KNeighborsRegressor,,brute,experiment_2022-5-18-12-47-46,c:\users\music\appdata\local\programs\python\p...,MUSIC,LOCAL
7,0961ea7d69394ab4bd506a53fbfda58e,136,FINISHED,s3://training-storage/136/0961ea7d69394ab4bd50...,2022-05-18 10:47:10.138000+00:00,2022-05-18 10:47:17.286000+00:00,5.0,120.0,2022518000000.0,0.635251,KNeighborsRegressor,,brute,experiment_2022-5-18-12-47-10,c:\users\music\appdata\local\programs\python\p...,MUSIC,LOCAL
8,667053938f5b4a78bfc478b07dcb3ad7,136,FINISHED,s3://training-storage/136/667053938f5b4a78bfc4...,2022-05-18 10:46:37.003000+00:00,2022-05-18 10:46:41.642000+00:00,3.0,20.0,2022518000000.0,0.623332,KNeighborsRegressor,,auto,experiment_2022-5-18-12-46-36,c:\users\music\appdata\local\programs\python\p...,MUSIC,LOCAL
9,0915413e8efa451fa571c834e188941b,136,FINISHED,s3://training-storage/136/0915413e8efa451fa571...,2022-05-18 10:45:27.811000+00:00,2022-05-18 10:45:33.070000+00:00,3.0,60.0,2022518000000.0,0.600565,KNeighborsRegressor,,brute,experiment_2022-5-18-12-45-27,c:\users\music\appdata\local\programs\python\p...,MUSIC,LOCAL
