In [1]:
import pandas as pd
import requests
import json
from datetime import datetime
from pathlib import Path
from config import Settings

PORT = Settings().PORT
# Define target and feature columns
TARGET_COLUMNS = ['PRIORIDADES_anexo1', 'COMUM_anexo1', 'COMUM_anexo3', 'PRIORIDADES_anexo3', 'COMUM_mossoro', 'PRIORIDADES_mossoro', 'COMUM_parnamirim', 'PRIORIDADES_parnamirim']
FEATURE_COLUMNS = []
IP = '127.0.0.1'

In [2]:


# Read training data file as binary
train_file_path = Path("test_data/df_train.parquet")

# Debug print
print("Reading training data from:", train_file_path)
print("Target columns:", TARGET_COLUMNS)
print("Feature columns:", FEATURE_COLUMNS)

# First, verify we can read the data
df_train = pd.read_parquet(train_file_path)
print("\nTraining data shape:", df_train.shape)
print("Training data columns:", df_train.columns.tolist())
print("Training data index type:", type(df_train.index))


Reading training data from: test_data/df_train.parquet
Target columns: ['PRIORIDADES_anexo1', 'COMUM_anexo1', 'COMUM_anexo3', 'PRIORIDADES_anexo3', 'COMUM_mossoro', 'PRIORIDADES_mossoro', 'COMUM_parnamirim', 'PRIORIDADES_parnamirim']
Feature columns: []

Training data shape: (848, 8)
Training data columns: ['COMUM_anexo1', 'PRIORIDADES_anexo1', 'COMUM_anexo3', 'PRIORIDADES_anexo3', 'COMUM_mossoro', 'PRIORIDADES_mossoro', 'COMUM_parnamirim', 'PRIORIDADES_parnamirim']
Training data index type: <class 'pandas.core.indexes.datetimes.DatetimeIndex'>


In [3]:

with open(train_file_path, "rb") as f:
    # Create the multipart form data
    files = {
        "file": ("train.parquet", f, "application/octet-stream")
    }
    
    # Create form data dictionary
    form_data = {
        'target_columns': TARGET_COLUMNS,
        'feature_columns': FEATURE_COLUMNS  # Send empty list
    }

    print("\nSending fit request...")
    print("Form data:", form_data)
    
    response = requests.post(
        f'http://{IP}:{PORT}/fit',
        files=files,
        data=form_data
    )

print(f"\nFit response status: {response.status_code}")
print(f"Fit response content: {response.text}")



Sending fit request...
Form data: {'target_columns': ['PRIORIDADES_anexo1', 'COMUM_anexo1', 'COMUM_anexo3', 'PRIORIDADES_anexo3', 'COMUM_mossoro', 'PRIORIDADES_mossoro', 'COMUM_parnamirim', 'PRIORIDADES_parnamirim'], 'feature_columns': []}

Fit response status: 200
Fit response content: {"message":"Model fitted successfully"}


In [4]:

if response.status_code == 200:
    df_test = pd.read_parquet("test_data/df_test.parquet")
    start_date = df_test.index.min()
    end_date = df_test.index.max()
    
    # Get feature values from test data
    features_data = {
        feature: df_test[feature].tolist() for feature in FEATURE_COLUMNS
    }

    forecast_request = {
        "start_date": start_date.strftime("%Y-%m-%dT%H:%M:%S"),
        "end_date": end_date.strftime("%Y-%m-%dT%H:%M:%S"),
        "features_data": features_data if FEATURE_COLUMNS else None
    }
    
    print("\nSending forecast request...")
    
    response = requests.post(
        f'http://{IP}:{PORT}/forecast',
        json=forecast_request
    )
    print(f"\nForecast response status: {response.status_code}")
    print(f"Forecast response content: {response.text}")
else:
    print("Skipping forecast due to fit failure")



Sending forecast request...

Forecast response status: 200
Forecast response content: {"PRIORIDADES_anexo1":{"dates":["2024-09-26","2024-09-27","2024-09-28","2024-09-29","2024-09-30","2024-10-01","2024-10-02","2024-10-03","2024-10-04","2024-10-05","2024-10-06","2024-10-07","2024-10-08","2024-10-09","2024-10-10"],"predictions":[30.327787542314006,28.058812057802445,6.857636122005822,6.661687668318674,33.18410312756257,48.72927569636546,31.273150069576936,29.573125506803038,27.11233063908867,5.7511782411154755,5.432357879476143,31.873247316535526,47.381315341966484,29.934302410808392,28.29004497791782]},"COMUM_anexo1":{"dates":["2024-09-26","2024-09-27","2024-09-28","2024-09-29","2024-09-30","2024-10-01","2024-10-02","2024-10-03","2024-10-04","2024-10-05","2024-10-06","2024-10-07","2024-10-08","2024-10-09","2024-10-10"],"predictions":[14.12388794270004,11.87508477457964,-7.369836421231504,-7.6329839111990605,14.442252009537059,17.66207251421256,14.225676675805918,13.099163652271976,11.0

In [5]:
import pandas as pd
import json

# Assuming `response` is the variable holding the forecast response
forecast_data = json.loads(response.text)

# Initialize an empty DataFrame with dates as the index
dates = forecast_data[next(iter(forecast_data))]['dates']
df_forecast = pd.DataFrame(index=pd.to_datetime(dates))

# Populate the DataFrame with predictions for each feature
for feature, data in forecast_data.items():
    df_forecast[feature] = data['predictions']

df_forecast

Unnamed: 0,PRIORIDADES_anexo1,COMUM_anexo1,COMUM_anexo3,PRIORIDADES_anexo3,COMUM_mossoro,PRIORIDADES_mossoro,COMUM_parnamirim,PRIORIDADES_parnamirim
2024-09-26,30.327788,14.123888,37.781343,12.250119,39.669952,15.059646,43.114833,19.21944
2024-09-27,28.058812,11.875085,33.683102,10.730874,35.403091,13.664805,30.556815,13.582015
2024-09-28,6.857636,-7.369836,7.467711,2.426431,4.279667,1.919113,1.890808,7.527276
2024-09-29,6.661688,-7.632984,7.246559,2.354521,3.968577,1.769229,1.25982,7.453187
2024-09-30,33.184103,14.442252,46.229409,16.010805,46.574189,17.717395,43.366112,18.859231
2024-10-01,48.729276,17.662073,42.74076,14.176005,43.120579,17.006801,43.104081,19.341919
2024-10-02,31.27315,14.225677,40.324382,12.876796,40.68669,15.803624,43.700471,18.870327
2024-10-03,29.573126,13.099164,37.067109,11.889932,38.206281,14.492114,40.370621,19.310923
2024-10-04,27.112331,11.022629,32.974958,10.364619,34.124319,13.195437,27.761558,13.662353
2024-10-05,5.751178,-8.036158,6.781871,2.059144,3.220118,1.556738,-0.911927,7.604301


In [6]:
# Calculate the forecast r2, mae, and mse
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

# Assuming `df_test` is the variable holding the test data
df_test = pd.read_parquet("test_data/df_test.parquet")

# Initialize dictionaries to hold the metrics
r2_scores = {}
mae_scores = {}
mse_scores = {}

# Calculate the metrics for each feature
for feature in df_forecast.columns:
    r2_scores[feature] = r2_score(df_test[feature], df_forecast[feature])
    mae_scores[feature] = mean_absolute_error(df_test[feature], df_forecast[feature])
    mse_scores[feature] = mean_squared_error(df_test[feature], df_forecast[feature])

# Create a DataFrame from the metrics
df_metrics = pd.DataFrame({
    'r2': r2_scores,
    'mae': mae_scores,
    'mse': mse_scores
})

df_metrics

Unnamed: 0,r2,mae,mse
PRIORIDADES_anexo1,0.65756,9.544202,140.786868
COMUM_anexo1,0.106272,5.495,43.312063
COMUM_anexo3,0.697586,8.806887,137.949027
PRIORIDADES_anexo3,0.468696,3.526349,20.529602
COMUM_mossoro,0.374227,10.231089,295.187006
PRIORIDADES_mossoro,0.46845,5.600983,59.439153
COMUM_parnamirim,0.653822,10.429801,216.516795
PRIORIDADES_parnamirim,0.363231,7.033739,69.51815
