In [1]:
import pandas as pd
import requests
import json
from datetime import datetime
from pathlib import Path
from config import Settings

PORT = Settings().PORT
# Define target and feature columns
TARGET_COLUMNS = ['PRIORIDADES_anexo1', 'COMUM_anexo1', 'COMUM_anexo3', 'PRIORIDADES_anexo3', 'COMUM_mossoro', 'PRIORIDADES_mossoro', 'COMUM_parnamirim', 'PRIORIDADES_parnamirim']
FEATURE_COLUMNS = []
IP = '127.0.0.1'

In [71]:


# Read training data file as binary
train_file_path = Path("test_data/df_train.parquet")

# Debug print
print("Reading training data from:", train_file_path)
print("Target columns:", TARGET_COLUMNS)
print("Feature columns:", FEATURE_COLUMNS)

# First, verify we can read the data
df_train = pd.read_parquet(train_file_path)
print("\nTraining data shape:", df_train.shape)
print("Training data columns:", df_train.columns.tolist())
print("Training data index type:", type(df_train.index))


Reading training data from: test_data/df_train.parquet
Target columns: ['PRIORIDADES_anexo1', 'COMUM_anexo1', 'COMUM_anexo3', 'PRIORIDADES_anexo3', 'COMUM_mossoro', 'PRIORIDADES_mossoro', 'COMUM_parnamirim', 'PRIORIDADES_parnamirim']
Feature columns: []

Training data shape: (848, 8)
Training data columns: ['COMUM_anexo1', 'PRIORIDADES_anexo1', 'COMUM_anexo3', 'PRIORIDADES_anexo3', 'COMUM_mossoro', 'PRIORIDADES_mossoro', 'COMUM_parnamirim', 'PRIORIDADES_parnamirim']
Training data index type: <class 'pandas.core.indexes.datetimes.DatetimeIndex'>


In [72]:

with open(train_file_path, "rb") as f:
    # Create the multipart form data
    files = {
        "file": ("train.parquet", f, "application/octet-stream")
    }
    
    # Create form data dictionary
    form_data = {
        'target_columns': TARGET_COLUMNS,
        'feature_columns': FEATURE_COLUMNS  # Send empty list
    }

    print("\nSending fit request...")
    print("Form data:", form_data)
    
    response = requests.post(
        f'http://{IP}:{PORT}/fit',
        files=files,
        data=form_data
    )

print(f"\nFit response status: {response.status_code}")
print(f"Fit response content: {response.text}")



Sending fit request...
Form data: {'target_columns': ['PRIORIDADES_anexo1', 'COMUM_anexo1', 'COMUM_anexo3', 'PRIORIDADES_anexo3', 'COMUM_mossoro', 'PRIORIDADES_mossoro', 'COMUM_parnamirim', 'PRIORIDADES_parnamirim'], 'feature_columns': []}

Fit response status: 200
Fit response content: {"message":"Model fitted successfully"}


In [73]:

if response.status_code == 200:
    df_test = pd.read_parquet("test_data/df_test.parquet")
    start_date = df_test.index.min()
    end_date = df_test.index.max()
    
    # Get feature values from test data
    features_data = {
        feature: df_test[feature].tolist() for feature in FEATURE_COLUMNS
    }

    forecast_request = {
        "start_date": start_date.strftime("%Y-%m-%dT%H:%M:%S"),
        "end_date": end_date.strftime("%Y-%m-%dT%H:%M:%S"),
        "features_data": features_data if FEATURE_COLUMNS else None,
        "coverage": 0.8
    }
    
    print("\nSending forecast request...")
    
    response = requests.post(
        f'http://{IP}:{PORT}/forecast',
        json=forecast_request
    )
    print(f"\nForecast response status: {response.status_code}")
    print(f"Forecast response content: {response.text}")
else:
    print("Skipping forecast due to fit failure")



Sending forecast request...

Forecast response status: 200
Forecast response content: {"PRIORIDADES_anexo1":{"dates":["2024-09-26","2024-09-27","2024-09-28","2024-09-29","2024-09-30","2024-10-01","2024-10-02","2024-10-03","2024-10-04","2024-10-05","2024-10-06","2024-10-07","2024-10-08","2024-10-09","2024-10-10"],"predictions":[30.831045588531765,28.46561349311971,7.3406646523891865,7.31353906351217,33.757754715310455,49.74654980508363,32.32002276346663,14.691806732779526,27.963195531988738,6.639385421585825,6.450193698498715,32.77387937902086,48.26603413598548,30.811385291287383,29.069909557197654],"lower_bound":[19.41160898742795,17.208991279783543,0.0,0.0,21.993424138037135,38.096465416492684,20.429845131005838,3.1499056409251374,16.270040886157265,0.0,0.0,21.099940866809547,37.16423718617818,19.060238045402436,17.152610438847855],"upper_bound":[42.30248390921819,40.533898107613616,19.996392231034605,18.922205921736616,46.20163014491129,62.38831319066308,43.992360492038266,26.167844

In [74]:
import pandas as pd
import json

# `response` is the variable holding the forecast response
forecast_data = json.loads(response.text)
cols = list(forecast_data.keys())
coverage_value = forecast_data[cols[0]]['coverage']

print(f"Forecast coverage: {coverage_value}")

# Initialize an empty DataFrame with dates as the index
dates = forecast_data[next(iter(forecast_data))]['dates']
df_forecast = pd.DataFrame(index=pd.to_datetime(dates))
df_interval = pd.DataFrame(index=pd.to_datetime(dates))

# Populate the DataFrame with predictions for each feature
for feature, data in forecast_data.items():
    df_forecast[feature] = data['predictions']
    df_interval[feature + '_lower'] = data['lower_bound']
    df_interval[feature + '_upper'] = data['upper_bound']


df_forecast

Forecast coverage: 0.8


Unnamed: 0,PRIORIDADES_anexo1,COMUM_anexo1,COMUM_anexo3,PRIORIDADES_anexo3,COMUM_mossoro,PRIORIDADES_mossoro,COMUM_parnamirim,PRIORIDADES_parnamirim
2024-09-26,30.831046,14.341095,37.354595,12.281333,40.308108,15.043039,43.325464,19.065787
2024-09-27,28.465613,12.016188,33.068792,10.712392,35.87406,13.584087,30.539924,13.328906
2024-09-28,7.340665,0.0,6.890872,2.430687,4.858816,1.86505,2.025435,7.24986
2024-09-29,7.313539,0.0,6.858378,2.413457,4.737741,1.77771,1.645445,7.223995
2024-09-30,33.757755,14.64289,45.677898,16.022695,47.227011,17.670144,43.611167,18.594422
2024-10-01,49.74655,18.545748,44.593025,14.916116,44.89768,17.674404,44.646721,20.169575
2024-10-02,32.320023,15.065096,42.044468,13.596947,42.421573,16.440434,45.150121,19.701725
2024-10-03,14.691807,2.936236,20.086269,6.359185,20.571993,7.236436,18.945967,13.69113
2024-10-04,27.963196,11.725593,34.319734,10.976582,35.591706,13.702999,28.881629,14.28169
2024-10-05,6.639385,0.0,8.11827,2.674628,4.761133,2.071482,0.326426,8.190069


In [56]:
df_interval

Unnamed: 0,PRIORIDADES_anexo1_lower,PRIORIDADES_anexo1_upper,COMUM_anexo1_lower,COMUM_anexo1_upper,COMUM_anexo3_lower,COMUM_anexo3_upper,PRIORIDADES_anexo3_lower,PRIORIDADES_anexo3_upper,COMUM_mossoro_lower,COMUM_mossoro_upper,PRIORIDADES_mossoro_lower,PRIORIDADES_mossoro_upper,COMUM_parnamirim_lower,COMUM_parnamirim_upper,PRIORIDADES_parnamirim_lower,PRIORIDADES_parnamirim_upper
2024-09-26,19.413832,42.923296,3.95186,23.943242,24.424909,50.070113,6.692461,17.662537,26.886213,54.728074,8.641728,21.422377,28.233294,59.750598,12.505154,25.600585
2024-09-27,16.326293,40.245024,2.389356,21.86278,20.087405,46.485989,5.061182,16.419265,21.394799,50.798853,7.36151,19.634493,13.46083,46.165583,6.809866,19.936358
2024-09-28,0.0,18.542752,0.0,2.20658,0.0,18.440836,0.0,8.068353,0.0,18.291102,0.0,8.171909,0.0,17.675184,0.525544,13.581826
2024-09-29,0.0,18.216629,0.0,2.670959,0.0,19.071057,0.0,7.826031,0.0,19.408665,0.0,7.889634,0.0,17.352527,0.633225,13.902288
2024-09-30,21.625604,45.205297,4.913087,24.524368,32.793704,57.763771,9.763414,21.359266,33.505785,61.462088,11.799261,24.156784,28.488893,59.454759,12.201564,25.091148
2024-10-01,37.892226,61.341636,8.190284,29.308335,31.648026,56.782952,9.176324,20.554166,31.70016,59.637884,11.99295,24.292954,29.053425,60.001671,13.272765,26.871421
2024-10-02,20.632652,44.002885,4.726267,24.897644,30.624696,54.254606,7.844079,19.367602,28.626112,56.25722,10.034837,22.190513,28.27767,60.99076,13.316703,26.147288
2024-10-03,3.064223,26.335559,0.0,12.581156,8.292818,32.608356,0.804796,12.132771,5.683467,34.677475,1.177107,13.454566,2.985365,36.070802,7.279421,20.51664
2024-10-04,16.063682,40.038783,1.19628,21.723869,21.510251,47.221553,5.674466,16.749545,21.751102,50.021566,7.723485,20.200801,13.568552,45.801324,7.410283,21.119921
2024-10-05,0.0,18.303943,0.0,1.171353,0.0,20.880513,0.0,8.296808,0.0,19.494474,0.0,7.953818,0.0,17.090085,1.686423,14.716508


In [75]:
# Calculate the forecast r2, mae, and mse
from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error

# Assuming `df_test` is the variable holding the test data
df_test = pd.read_parquet("test_data/df_test.parquet")

# Initialize dictionaries to hold the metrics
r2_scores = {}
mae_scores = {}
mse_scores = {}

# Calculate the metrics for each feature
for feature in df_forecast.columns:
    r2_scores[feature] = r2_score(df_test[feature], df_forecast[feature])
    mae_scores[feature] = mean_absolute_error(df_test[feature], df_forecast[feature])
    mse_scores[feature] = root_mean_squared_error(df_test[feature], df_forecast[feature])

# Create a DataFrame from the metrics
df_metrics = pd.DataFrame({
    'r2': r2_scores,
    'mae': mae_scores,
    'mse': mse_scores
})

df_metrics

Unnamed: 0,r2,mae,mse
PRIORIDADES_anexo1,0.76798,8.55768,9.766794
COMUM_anexo1,0.639143,2.800599,4.181858
COMUM_anexo3,0.834744,7.637585,8.682346
PRIORIDADES_anexo3,0.586773,3.38988,3.995885
COMUM_mossoro,0.506638,9.692075,15.255375
PRIORIDADES_mossoro,0.581911,4.963101,6.83752
COMUM_parnamirim,0.804752,8.613377,11.050674
PRIORIDADES_parnamirim,0.454784,6.766263,7.71512


In [76]:
import plotly.graph_objects as go

for feature in df_forecast.columns:
    fig = go.Figure()
    
    # Add forecast trace
    fig.add_trace(go.Scatter(
        x=df_forecast.index,
        y=df_forecast[feature],
        mode='lines',
        name=feature
    ))
    
    # Add lower interval trace
    fig.add_trace(go.Scatter(
        x=df_interval.index,
        y=df_interval[feature + '_lower'],
        fill='tonexty',
        fillcolor='rgba(0,100,80,0.2)',
        line=dict(width=0),
        showlegend=True,
        name=f'lower confidence interval ({coverage_value:.0%})'
    ))
    
    # Add upper interval trace
    fig.add_trace(go.Scatter(
        x=df_interval.index,
        y=df_interval[feature + '_upper'],
        fill='tonexty',
        fillcolor='rgba(0,100,80,0.2)',
        line=dict(width=0),
        showlegend=True,
        name=f'upper confidence interval ({coverage_value:.0%})'
    ))
    
    # Update layout
    fig.update_layout(
        title=f'Forecast for {feature}',
        xaxis_title='Date',
        yaxis_title='Value',
        template='plotly_dark',
    )
    
    # Show figure
    fig.show()