In [None]:
# pip install nixtla

In [None]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
from nixtla import NixtlaClient

api_key = "nixak-rSGRUQqrWwdMVRyIsCiI0kIGBay5yQVS48BNWYEunTRoqYy1P0UtbRev8382bf6whgCtMqotmL1bIXIY"

RESULTS = {}  # reuse your global results dict

def log_simple_result(results_dict, dataset_name, horizon, mae, rmse, mape, r2):
    results_dict[dataset_name] = {
        "horizon": horizon,
        "MAE": round(mae, 2),
        "RMSE": round(rmse, 2),
        "MAPE (%)": round(mape, 2),
        "R²": round(r2, 4)
    }

def _pick_pred_col(fcst_df):
    """Heuristically pick the point-forecast column from Nixtla output."""
    # Common names: 'y_hat', 'TimeGPT', 'forecast', 'yhat', 'y_pred'
    candidates = []
    for c in fcst_df.columns:
        cl = c.lower()
        if cl in {"y_hat", "yhat", "forecast", "timegpt", "y_pred"}:
            candidates.append(c)
        elif "y_hat" in cl or "yhat" in cl:
            candidates.append(c)
    if candidates:
        return candidates[0]
    # Fallback: last non-key column
    key_cols = {"unique_id", "ds", "y"}
    for c in fcst_df.columns[::-1]:
        if c not in key_cols:
            return c
    raise ValueError("Could not identify forecast column in fcst_df.")

def _pick_interval_cols(fcst_df, level=80):
    """Try to find lower/upper interval columns for a given level."""
    lvl = str(level)
    lower_candidates = [c for c in fcst_df.columns if any(s in c.lower() for s in [f"lo-{lvl}", f"y_hat_lo-{lvl}", f"lower_{lvl}"])]
    upper_candidates = [c for c in fcst_df.columns if any(s in c.lower() for s in [f"hi-{lvl}", f"y_hat_hi-{lvl}", f"upper_{lvl}"])]
    lo = lower_candidates[0] if lower_candidates else None
    hi = upper_candidates[0] if upper_candidates else None
    return lo, hi

def timegpt_prediction(
    dataset,
    horizon=24,
    frequency="M",
    dataset_name=None,
    results_dict=RESULTS,
    api_key=None,
    level=(80, 90),
    client_kwargs=None,
    forecast_kwargs=None,
):


    # 1) Load dataset
    train_df, test_df = dataset(horizon=horizon)
    uid = train_df["unique_id"].iloc[0]

    # 2) Instantiate client
    client_kwargs = client_kwargs or {}
    nixtla_client = NixtlaClient(api_key=api_key, **client_kwargs)

    # 3) Prepare train in Nixtla format
    train_nixtla = train_df[["unique_id", "ds", "y"]].copy()
    train_nixtla["ds"] = pd.to_datetime(train_nixtla["ds"])

    # 4) Forecast next `horizon` from end of train
    forecast_kwargs = forecast_kwargs or {}
    fcst_df = nixtla_client.forecast(
        train_nixtla,
        h=horizon,
        level=list(level) if level else None,
        **forecast_kwargs
    )

    # 5) Align predictions with test horizon
    fcst_uid = fcst_df[fcst_df["unique_id"] == uid].sort_values("ds")
    pred_col = _pick_pred_col(fcst_uid)
    preds = fcst_uid.tail(horizon)[pred_col].to_numpy()

    # Actuals
    actuals = test_df["y"].to_numpy()

    # 6) Metrics
    mae = mean_absolute_error(actuals, preds)
    rmse = math.sqrt(mean_squared_error(actuals, preds))
    mape = np.mean(np.abs((actuals - preds) / np.clip(actuals, 1e-8, None))) * 100
    r2 = 1 - np.sum((actuals - preds) ** 2) / np.sum((actuals - np.mean(actuals)) ** 2)

    # 7) Save results
    name = dataset_name or getattr(dataset, "__name__", "unnamed_dataset")
    log_simple_result(results_dict, name, horizon, mae, rmse, mape, r2)

    # 8) Plot (with intervals if available)
    plt.figure(figsize=(12, 6))
    plt.plot(train_df["ds"], train_df["y"], label="Train")
    plt.plot(test_df["ds"], actuals, label="Actual", marker="o")
    plt.plot(test_df["ds"], preds, label=f"Forecast ({pred_col})", marker="x")

    # Try 1st level for band
    band_level = level[0] if level else None
    if band_level is not None:
        lo_col, hi_col = _pick_interval_cols(fcst_uid, level=band_level)
        if lo_col and hi_col:
            band = fcst_uid.tail(horizon)
            plt.fill_between(
                test_df["ds"],
                band[lo_col].to_numpy(),
                band[hi_col].to_numpy(),
                alpha=0.25,
                label=f"{band_level}% PI"
            )

    plt.title(f"{name} Forecast - Nixtla/TimeGPT (H={horizon})")
    plt.xlabel("Date")
    plt.ylabel("y")
    plt.legend()
    plt.grid(True)
    plt.show()

    return results_dict

In [None]:
def load_air_passengers(horizon=24, unique_id="AP1"):

    url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv"
    df = pd.read_csv(url)

    # Standardize column names
    df['Month'] = pd.to_datetime(df['Month'])
    df = df.rename(columns={"Month": "ds", "Passengers": "y"})
    print(df.shape)
    print('Mean: ', df['y'].mean())

    # Split train/test
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

# Reuse your existing loader
timegpt_prediction(
    dataset=load_air_passengers,
    horizon=24,
    frequency='M',
    api_key=api_key,
    level=(80, 90)
)

In [None]:
def load_air_sunsopts(horizon=24, unique_id="AP1"):

    url = "https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/Sunspots.csv"
    df = pd.read_csv(url)
    df = df.drop(columns=['Unnamed: 0'])
    print(df.shape)

    # Standardize column names
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.rename(columns={"Date": "ds", "Monthly Mean Total Sunspot Number": "y"})
    print('Mean: ', df['y'].mean())

    # Split train/test
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

# Reuse your existing loader
timegpt_prediction(
    dataset=load_air_sunsopts,
    horizon=24,
    frequency='M',
    api_key=api_key,
    level=(80, 90)
)

In [None]:
def load_temp_daily(horizon=24, unique_id="AP1"):
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/temp.csv")

    if 'Unnamed: 2' in df.columns:
        df = df.drop(columns=['Unnamed: 2'])

    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
    df = df.dropna(subset=['Date'])

    df = df.rename(columns={"Date": "ds", "temp": "y"})
    df['y'] = pd.to_numeric(df['y'], errors='coerce')
    df = df.dropna(subset=['y'])

    # Ensure daily regularity: sort, drop dupes, reindex full daily range
    df = df.sort_values('ds').drop_duplicates('ds')
    full_idx = pd.date_range(df['ds'].min(), df['ds'].max(), freq='D')
    df = df.set_index('ds').reindex(full_idx)
    df.index.name = 'ds'

    # Fill small gaps if needed
    df['y'] = df['y'].interpolate(limit_direction='both')

    print('Mean:', df['y'].mean())

    train_df = df.iloc[:-horizon].copy()
    test_df  = df.iloc[-horizon:].copy()

    train_df['unique_id'] = unique_id
    test_df['unique_id']  = unique_id

    return train_df.reset_index().rename(columns={'index':'ds'})[['unique_id','ds','y']], \
           test_df.reset_index().rename(columns={'index':'ds'})[['unique_id','ds','y']]

# Use daily frequency here:
timegpt_prediction(
    dataset=load_temp_daily,
    horizon=24,            # 24 days ahead
    frequency='D',         # <-- daily
    api_key=api_key,
    level=(80, 90)
)

In [None]:
def load_temperature(horizon=24, unique_id="AP1"):
    # Load the dataset
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/Rainfall_data.csv")

    # Create timestamp from Year, Month, Day
    df['ds'] = pd.to_datetime(df[['Year', 'Month', 'Day']])

    # Use Temperature column as 'y'
    df['y'] = pd.to_numeric(df['Temperature'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean Temperature:', df['y'].mean())

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

timegpt_prediction(
    dataset=load_temperature,
    horizon=24,
    frequency='M',
    api_key=api_key,
    level=(80, 90)
)

In [None]:
def load_precipitation(horizon=24, unique_id="AP1"):
    # Load the dataset
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/Rainfall_data.csv")

    # Create timestamp from Year, Month, Day
    df['ds'] = pd.to_datetime(df[['Year', 'Month', 'Day']])

    # Use Temperature column as 'y'
    df['y'] = pd.to_numeric(df['Precipitation'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean Precipitation:', df['y'].mean())

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

timegpt_prediction(
    dataset=load_precipitation,
    horizon=24,
    frequency='M',
    api_key=api_key,
    level=(80, 90)
)

In [None]:
def load_humidity(horizon=24, unique_id="AP1"):
    # Load the dataset
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/Rainfall_data.csv")

    # Create timestamp from Year, Month, Day
    df['ds'] = pd.to_datetime(df[['Year', 'Month', 'Day']])

    # Use Temperature column as 'y'
    df['y'] = pd.to_numeric(df['Specific Humidity'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean Specific Humidity:', df['y'].mean())

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

timegpt_prediction(
    dataset=load_humidity,
    horizon=24,
    frequency='M',
    api_key=api_key,
    level=(80, 90)
)

In [None]:
def load_relative_humidity(horizon=24, unique_id="AP1"):
    # Load the dataset
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/Rainfall_data.csv")

    # Create timestamp from Year, Month, Day
    df['ds'] = pd.to_datetime(df[['Year', 'Month', 'Day']])

    # Use Temperature column as 'y'
    df['y'] = pd.to_numeric(df['Relative Humidity'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean Relative Humidity:', df['y'].mean())

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

timegpt_prediction(
    dataset=load_relative_humidity,
    horizon=24,
    frequency='M',
    api_key=api_key,
    level=(80, 90)
)

In [None]:
def load_birth(horizon=24, unique_id="AP1"):
    df = pd.read_csv("https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-total-female-births.csv")

    # Remove unnamed columns
    if 'Unnamed: 2' in df.columns:
        df = df.drop(columns=['Unnamed: 2'])

    # Parse date safely
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

    # Drop rows with invalid dates (like the description text)
    df = df.dropna(subset=['Date'])

    # Rename columns
    df = df.rename(columns={"Date": "ds", "Births": "y"})

    # Convert 'y' to numeric, coercing errors to NaN and dropping them
    df['y'] = pd.to_numeric(df['y'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean:', df['y'].mean())
    print(df.shape)

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

timegpt_prediction(
    dataset=load_birth,
    horizon=24,
    frequency='M',
    api_key=api_key,
    level=(80, 90)
)

In [None]:
def load_store(horizon=24, unique_id="AP1"):
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/store.csv")
    df = df[(df['store'] == 0) & (df['product'] == 0)]
    df = df.drop(columns=['store', 'product'])

    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

    # Drop rows with invalid dates (like the description text)
    df = df.dropna(subset=['Date'])

    # Rename columns
    df = df.rename(columns={"Date": "ds", "number_sold": "y"})

    # Convert 'y' to numeric, coercing errors to NaN and dropping them
    df['y'] = pd.to_numeric(df['y'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean:', df['y'].mean())
    print(df.shape)

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

timegpt_prediction(
    dataset=load_store,
    horizon=24,
    frequency='M',
    api_key=api_key,
    level=(80, 90)
)

In [None]:
def load_hospitality(horizon=24, unique_id="AP1"):
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/HospitalityEmployees.csv")

    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

    # Drop rows with invalid dates (like the description text)
    df = df.dropna(subset=['Date'])

    # Rename columns
    df = df.rename(columns={"Date": "ds", "Employees": "y"})

    # Convert 'y' to numeric, coercing errors to NaN and dropping them
    df['y'] = pd.to_numeric(df['y'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean:', df['y'].mean())
    print(df.shape)

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

timegpt_prediction(
    dataset=load_hospitality,
    horizon=24,
    frequency='M',
    api_key=api_key,
    level=(80, 90)
)

In [None]:
timeGPT_results = pd.DataFrame(RESULTS).T
timeGPT_results.to_csv('timeGPT_results.csv', index=True)

In [None]:
timeGPT_results