In [6]:
from nixtla import NixtlaClient
import pandas as pd
import numpy as np
import os
import random
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
API_KEY = os.getenv('NIXTLA_API_KEY')
nixtla_client = NixtlaClient(api_key=API_KEY)
nixtla_client.validate_api_key()

In [49]:
import pandas as pd
import numpy as np
import requests
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

BASE_URL = 'https://api.nixtla.io/forecast'
HEADERS = {'Authorization': f'Bearer {API_KEY}'}

xlsx_files = [
    "Solar station site 1 (Nominal capacity-50MW).xlsx",
    "Solar station site 2 (Nominal capacity-130MW).xlsx",
    "Solar station site 3 (Nominal capacity-30MW).xlsx",
    "Solar station site 4 (Nominal capacity-130MW).xlsx",
    "Solar station site 5 (Nominal capacity-110MW).xlsx",
    "Solar station site 6 (Nominal capacity-35MW).xlsx",
    "Solar station site 7 (Nominal capacity-30MW).xlsx",
    "Solar station site 8 (Nominal capacity-30MW).xlsx",
    "Wind farm site 1 (Nominal capacity-99MW).xlsx",
    "Wind farm site 2 (Nominal capacity-200MW).xlsx",
    "Wind farm site 3 (Nominal capacity-99MW).xlsx",
    "Wind farm site 4 (Nominal capacity-66MW).xlsx",
    "Wind farm site 5 (Nominal capacity-36MW).xlsx",
    "Wind farm site 6 (Nominal capacity-96MW).xlsx",
]
for site_number, file in enumerate(xlsx_files, 1):
    print(f"Processing file: {file}")
    data = pd.read_excel(f'datasets/{file}')
    
    # Convert time column to datetime and correct invalid times
    data['Time(year-month-day h:m:s)'] = data['Time(year-month-day h:m:s)'].apply(lambda x: str(x).replace(' 24:', ' 00:'))
    data['Time(year-month-day h:m:s)'] = pd.to_datetime(data['Time(year-month-day h:m:s)'], format='%Y-%m-%d %H:%M:%S')

    # Set time column as index
    data.set_index('Time(year-month-day h:m:s)', inplace=True)

    # Strip leading/trailing spaces from column names
    data.columns = data.columns.str.strip()

    # Handle NaN values
    data.ffill(inplace=True)

    # Normalize the features
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(data)
    data_scaled = pd.DataFrame(data_scaled, columns=data.columns, index=data.index)

    # Prepare input/output sequences
    X = data_scaled.iloc[:, :-1].values  # All features except the last (target) column
    y = data_scaled.iloc[:, -1].values   # Target column (Power output)

    # Splitting data into training and testing sets (80-20 split)
    train_size = int(0.8 * len(X))
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]

    # Prepare request payload
    history_data = data_scaled[['Power (MW)']].reset_index().to_dict(orient='records')
    historical_dict = {pd.to_datetime(record['Time(year-month-day h:m:s)']).strftime('%Y-%m-%d %H:%M:%S'): record['Power (MW)'] for record in history_data}
    
    payload = {
        "model": "timegpt-1",
        "freq": "H",
        "y": historical_dict,
        "fh": len(y_test)
    }
    
    # Make the request to Nixtla API
    response = requests.post(BASE_URL, json=payload, headers=HEADERS)
    
    if response.status_code == 200:
        forecast_data = response.json()
        timestamps = forecast_data['data']['timestamp']
        predictions = forecast_data['data']['value']
        
        # Convert timestamps to datetime
        forecast_df = pd.DataFrame({
            'timestamp': pd.to_datetime(timestamps),
            'predictions': predictions
        })
        
        # Evaluate model
        test_df = pd.DataFrame({
            'timestamp': data_scaled.index[-len(y_test):],
            'actual': y_test
        })
        
        # Merge forecasts with actual test data
        merged_df = pd.merge(test_df, forecast_df, how='left', left_on='timestamp', right_on='timestamp')
        actual = merged_df['actual'].values
        predicted = merged_df['predictions'].values
        
        # Calculate metrics
        rmse = np.sqrt(mean_squared_error(actual, predicted))
        mae = mean_absolute_error(actual, predicted)
        r2 = r2_score(actual, predicted)
        
        print(f"Site {site_number}:")
        print(f" RMSE: {rmse}")
        print(f" MAE: {mae}")
        print(f" R2 Score: {r2}")

        # Save results to file
        results_file = "wind_forecast_results.txt"
        with open(results_file, "a") as file:
            file.write(f"Site {site_number}:\n")
            file.write(f"RMSE: {rmse}\n")
            file.write(f"MAE: {mae}\n")
            file.write(f"R2 Score: {r2}\n")
            file.write("\n")
    else:
        print(f"Failed to get forecast for site {site_number}. Status code: {response.status_code}, Message: {response.json().get('message')}")


Processing file: Wind farm site 1 (Nominal capacity-99MW).xlsx
Failed to get forecast for site 1. Status code: 500, Message: Unexpected token 'e', "error code: 524" is not valid JSON
Processing file: Wind farm site 2 (Nominal capacity-200MW).xlsx
Failed to get forecast for site 2. Status code: 500, Message: Unexpected token 'e', "error code: 524" is not valid JSON
Processing file: Wind farm site 3 (Nominal capacity-99MW).xlsx
Failed to get forecast for site 3. Status code: 500, Message: Unexpected token 'e', "error code: 524" is not valid JSON
Processing file: Wind farm site 4 (Nominal capacity-66MW).xlsx
Failed to get forecast for site 4. Status code: 500, Message: Unexpected token 'e', "error code: 524" is not valid JSON
Processing file: Wind farm site 5 (Nominal capacity-36MW).xlsx


KeyboardInterrupt: 