In [12]:
import os
import urllib.request

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from pandas import Series, DataFrame

from sklearn.preprocessing import StandardScaler, MinMaxScaler, Normalizer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.model_selection import cross_validate, train_test_split, GridSearchCV, cross_val_score, StratifiedShuffleSplit
from sklearn.metrics import confusion_matrix

import mlflow
import mlflow.sklearn
from mlflow.models.signature import infer_signature
from mlflow.client import MlflowClient

In [2]:
os.environ['MLFLOW_S3_ENDPOINT_URL'] = "http://truenas.local:9000"
os.environ["MLFLOW_TRACKING_URI"] = "http://192.168.1.14:5000"

EXPERIMENT_NAME = "Weather Forecast Model Experiment"

In [3]:
client = MlflowClient()

In [5]:
reg_model = client.get_registered_model("KrasnodarWeatherForecastModel")

# Load registered model

In [10]:
model_uri = "models:/KrasnodarWeatherForecastModel/production"
loaded_model = mlflow.pyfunc.load_model(model_uri)

 - mlflow (current: 2.4.2, required: mlflow==2.4)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


In [26]:
sklearn_model = mlflow.sklearn.load_model(model_uri)
sklearn_model

In [11]:
loaded_model.metadata

<mlflow.models.model.Model at 0x7f184b554820>

In [13]:
data = urllib.request.urlopen("https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/Russia%2C%20Krasnodar/today?unitGroup=metric&include=days&key=BPRVC8SLP4Q6CW3H3DYF3TC4M&contentType=csv")
df = pd.read_csv(data)
df

Unnamed: 0,name,datetime,tempmax,tempmin,temp,feelslikemax,feelslikemin,feelslike,dew,humidity,...,solarenergy,uvindex,severerisk,sunrise,sunset,moonphase,conditions,description,icon,stations
0,"Краснодар, Южный федеральный округ, Россия",2023-07-11,25,17,20.3,25,17,20.3,14.8,71.7,...,16.1,8,30,2023-07-11T04:47:47,2023-07-11T20:11:02,0.78,"Rain, Partially cloudy",Partly cloudy throughout the day with storms p...,rain,URKK


# Data preprocessing for prediction

In [22]:
df_for_pred = df.drop(['name', 'conditions', 'datetime', 'description', 'moonphase', 'precipprob', 'preciptype', 'snow', 'snowdepth', 'stations', 'sunrise', 'sunset', 'severerisk'], axis=1)
df_for_pred['windgust'] = df_for_pred['windgust'].fillna(0.0)
y_for_pred = df_for_pred['icon']
df_for_pred = df_for_pred.drop(['icon'], axis=1)
df_for_pred = df_for_pred.astype(np.float64)
df_for_pred['uvindex'] = df_for_pred['uvindex'].astype(np.int64)

print(f"X: {df_for_pred}\n\ny: {y_for_pred}")

X:    tempmax  tempmin  temp  feelslikemax  feelslikemin  feelslike   dew  \
0     25.0     17.0  20.3          25.0          17.0       20.3  14.8   

   humidity  precip  precipcover  windgust  windspeed  winddir  \
0      71.7     2.3        33.33      42.1       28.8    243.4   

   sealevelpressure  cloudcover  visibility  solarradiation  solarenergy  \
0            1011.3        45.7        13.0           185.6         16.1   

   uvindex  
0        8  

y: 0    rain
Name: icon, dtype: object


In [23]:
df_for_pred

Unnamed: 0,tempmax,tempmin,temp,feelslikemax,feelslikemin,feelslike,dew,humidity,precip,precipcover,windgust,windspeed,winddir,sealevelpressure,cloudcover,visibility,solarradiation,solarenergy,uvindex
0,25.0,17.0,20.3,25.0,17.0,20.3,14.8,71.7,2.3,33.33,42.1,28.8,243.4,1011.3,45.7,13.0,185.6,16.1,8


In [24]:
y_pred = loaded_model.predict(df_for_pred)
y_pred

array(['rain'], dtype=object)

In [27]:
sklearn_model.predict_proba(df_for_pred)

array([[2.96508713e-04, 3.07553626e-05, 3.27368911e-06, 4.60538337e-04,
        9.97836388e-01, 1.37005783e-03, 2.47796694e-06]])

# Test Clear-Day

In [30]:
data = urllib.request.urlopen("https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/Russia%2C%20Krasnodar/2023-07-17/2023-07-17?unitGroup=metric&include=days&key=BPRVC8SLP4Q6CW3H3DYF3TC4M&contentType=csv")
df = pd.read_csv(data)
df

Unnamed: 0,name,datetime,tempmax,tempmin,temp,feelslikemax,feelslikemin,feelslike,dew,humidity,...,solarenergy,uvindex,severerisk,sunrise,sunset,moonphase,conditions,description,icon,stations
0,"Краснодар, Южный федеральный округ, Россия",2023-07-17,28.9,17.9,23.3,27.8,17.9,23.1,11.5,49.9,...,29.1,9,10,2023-07-17T04:53:04,2023-07-17T20:06:59,0,Clear,Clear conditions throughout the day.,clear-day,


In [31]:
df_for_pred = df.drop(['name', 'conditions', 'datetime', 'description', 'moonphase', 'precipprob', 'preciptype', 'snow', 'snowdepth', 'stations', 'sunrise', 'sunset', 'severerisk'], axis=1)
df_for_pred['windgust'] = df_for_pred['windgust'].fillna(0.0)
y_for_pred = df_for_pred['icon']
df_for_pred = df_for_pred.drop(['icon'], axis=1)
df_for_pred = df_for_pred.astype(np.float64)
df_for_pred['uvindex'] = df_for_pred['uvindex'].astype(np.int64)

print(f"X: {df_for_pred}\n\ny: {y_for_pred}")

X:    tempmax  tempmin  temp  feelslikemax  feelslikemin  feelslike   dew  \
0     28.9     17.9  23.3          27.8          17.9       23.1  11.5   

   humidity  precip  precipcover  windgust  windspeed  winddir  \
0      49.9     0.0          0.0      20.5       16.2     34.3   

   sealevelpressure  cloudcover  visibility  solarradiation  solarenergy  \
0            1016.5        12.8        24.1           336.9         29.1   

   uvindex  
0        9  

y: 0    clear-day
Name: icon, dtype: object


In [32]:
y_pred = loaded_model.predict(df_for_pred)
y_pred

array(['clear-day'], dtype=object)

In [33]:
sklearn_model.predict_proba(df_for_pred)

array([[9.98273853e-01, 1.05622052e-06, 1.12509563e-07, 1.42664656e-03,
        2.51155911e-04, 4.70888873e-05, 8.73892207e-08]])