### Load package


In [1]:
import numpy as np
import pandas as pd
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [2]:
%cd /content/drive/MyDrive/GE_Capstone/Model Test/Forecasting

/content/drive/MyDrive/GE_Capstone/Model Test/Forecasting


### Modify data format for PdM data


In [None]:
# Load the Excel file
file_path = "PdM_forecasting.xlsx"
all_sheets = pd.read_excel(file_path, sheet_name=None)  # Read all sheets

# Dictionary to store modified sheets
modified_sheets = {}

for sheet_name, df in all_sheets.items():
    # Rename the second column to "target"
    df.columns = ['timestamp', 'target']  # Assume the original second column was the variable

    # Add "item_id" column as the first column
    df.insert(0, 'item_id', 1)

    # Rename "datetime" to "timestamp" and format it as "%Y-%m-%d %H:%M:%S"
    df['timestamp'] = pd.to_datetime(df['timestamp']).dt.strftime('%Y-%m-%d %H:%M:%S')

    # Store modified DataFrame
    modified_sheets[sheet_name] = df

# Save the modified data into a new Excel file
output_file = "Chronos_PdM_forecasting_reformat.xlsx"

with pd.ExcelWriter(output_file) as writer:
    for sheet_name, df in modified_sheets.items():
        df.to_excel(writer, sheet_name=sheet_name, index=False)

print("Processing complete. Saved as 'Chronos_PdM_forecasting_reformat.xlsx'.")

Processing complete. Saved as 'Chronos_PdM_forecasting_reformat.xlsx'.


### Modify data format for other forecasting datasets

In [7]:
import os

# Define input and output directories
input_dir = "/content/drive/MyDrive/GE_Capstone/Model Test/Forecasting/Forecasting Data/"
output_dir = "/content/drive/MyDrive/GE_Capstone/Model Test/Forecasting/Chronos Forecasting Data Reformat/"

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Get all CSV files in the input directory
csv_files = [f for f in os.listdir(input_dir) if f.endswith('.csv')]

# Function to convert timestamp format
def convert_timestamp(df):
    """ Convert timestamp to '%Y-%m-%d %H:%M:%S' format """
    if pd.api.types.is_integer_dtype(df['timestamp']):  # If timestamp is integer-based index
        df['timestamp'] = pd.date_range(start='2000-01-01', periods=len(df), freq='D')
    else:
        df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime
        # if df['timestamp'].isna().any():
        #     raise ValueError("Unrecognized timestamp format!")

    df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')  # Format as string
    return df

# Process all CSV files
for file in csv_files:
    print(f"Start to Process: {file}")
    file_path = os.path.join(input_dir, file)

    # Read CSV file
    df = pd.read_csv(file_path)

    # Rename columns
    df.columns = ['timestamp', 'target']

    # Convert timestamp format
    df = convert_timestamp(df)

    # Insert item_id column (using file name as item_id)
    df.insert(0, 'item_id', 1)

    # Round target column to 2 decimal places
    df['target'] = df['target'].round(2)

    # Generate new file name
    output_file = f"reform_{file}"
    output_path = os.path.join(output_dir, output_file)

    # Save the reformatted CSV file
    df.to_csv(output_path, index=False)

    print(f"Processed: {file} → {output_file}")

print("All files reformatted and saved successfully!")


Start to Process: MonthlyMilkDataset_DARTS_168.csv
Processed: MonthlyMilkDataset_DARTS_168.csv → reform_MonthlyMilkDataset_DARTS_168.csv
Start to Process: AirPassengersDataset_DARTS_144.csv
Processed: AirPassengersDataset_DARTS_144.csv → reform_AirPassengersDataset_DARTS_144.csv
Start to Process: SunspotsDataset_DARTS_2820.csv
Processed: SunspotsDataset_DARTS_2820.csv → reform_SunspotsDataset_DARTS_2820.csv
Start to Process: TemperatureDataset_DARTS_3650.csv
Processed: TemperatureDataset_DARTS_3650.csv → reform_TemperatureDataset_DARTS_3650.csv
Start to Process: HeaterDataset_DARTS_198.csv
Processed: HeaterDataset_DARTS_198.csv → reform_HeaterDataset_DARTS_198.csv
Start to Process: IceCreamDataset_DARTS_198.csv
Processed: IceCreamDataset_DARTS_198.csv → reform_IceCreamDataset_DARTS_198.csv
Start to Process: TemperatureDataset_DARTS_500.csv
Processed: TemperatureDataset_DARTS_500.csv → reform_TemperatureDataset_DARTS_500.csv
Start to Process: TemperatureDataset_DARTS_2000.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime
  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime
  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime
  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: TemperatureDataset_DARTS_2000.csv → reform_TemperatureDataset_DARTS_2000.csv
Start to Process: TemperatureDataset_DARTS_200.csv
Processed: TemperatureDataset_DARTS_200.csv → reform_TemperatureDataset_DARTS_200.csv
Start to Process: ILINetDataset_DARTS_age25to64_364.csv
Processed: ILINetDataset_DARTS_age25to64_364.csv → reform_ILINetDataset_DARTS_age25to64_364.csv
Start to Process: ILINetDataset_DARTS_%WEIGHTED_ILI_1041.csv
Processed: ILINetDataset_DARTS_%WEIGHTED_ILI_1041.csv → reform_ILINetDataset_DARTS_%WEIGHTED_ILI_1041.csv
Start to Process: ILINetDataset_DARTS_age0to4_1041.csv
Processed: ILINetDataset_DARTS_age0to4_1041.csv → reform_ILINetDataset_DARTS_age0to4_1041.csv
Start to Process: ILINetDataset_DARTS_age25to49_677.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime
  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime
  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime
  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: ILINetDataset_DARTS_age25to49_677.csv → reform_ILINetDataset_DARTS_age25to49_677.csv
Start to Process: ILINetDataset_DARTS_age5to24_1041.csv
Processed: ILINetDataset_DARTS_age5to24_1041.csv → reform_ILINetDataset_DARTS_age5to24_1041.csv
Start to Process: ILINetDataset_DARTS_age50to64_677.csv
Processed: ILINetDataset_DARTS_age50to64_677.csv → reform_ILINetDataset_DARTS_age50to64_677.csv
Start to Process: ILINetDataset_DARTS_age65_1041.csv
Processed: ILINetDataset_DARTS_age65_1041.csv → reform_ILINetDataset_DARTS_age65_1041.csv
Start to Process: ILINetDataset_DARTS_illtotal_1041.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime
  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime
  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime
  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: ILINetDataset_DARTS_illtotal_1041.csv → reform_ILINetDataset_DARTS_illtotal_1041.csv
Start to Process: ILINetDataset_DARTS_num_provider_1041.csv
Processed: ILINetDataset_DARTS_num_provider_1041.csv → reform_ILINetDataset_DARTS_num_provider_1041.csv
Start to Process: ILINetDataset_DARTS_total_patient_1041.csv
Processed: ILINetDataset_DARTS_total_patient_1041.csv → reform_ILINetDataset_DARTS_total_patient_1041.csv
Start to Process: ExchangeRateDataset_DARTS_aus_7588.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: ExchangeRateDataset_DARTS_aus_7588.csv → reform_ExchangeRateDataset_DARTS_aus_7588.csv
Start to Process: ExchangeRateDataset_DARTS_brt_7588.csv
Processed: ExchangeRateDataset_DARTS_brt_7588.csv → reform_ExchangeRateDataset_DARTS_brt_7588.csv
Start to Process: ExchangeRateDataset_DARTS_brt_3000.csv
Processed: ExchangeRateDataset_DARTS_brt_3000.csv → reform_ExchangeRateDataset_DARTS_brt_3000.csv
Start to Process: ExchangeRateDataset_DARTS_aus_3000.csv
Processed: ExchangeRateDataset_DARTS_aus_3000.csv → reform_ExchangeRateDataset_DARTS_aus_3000.csv
Start to Process: ExchangeRateDataset_DARTS_aus_1000.csv
Processed: ExchangeRateDataset_DARTS_aus_1000.csv → reform_ExchangeRateDataset_DARTS_aus_1000.csv
Start to Process: ExchangeRateDataset_DARTS_brt_1000.csv
Processed: ExchangeRateDataset_DARTS_brt_1000.csv → reform_ExchangeRateDataset_DARTS_brt_1000.csv
Start to Process: ExchangeRateDataset_DARTS_can_7588.csv
Processed: ExchangeRateDataset_DARTS_can_7588.csv → reform_ExchangeRat

  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: AirQuality_UCI_ML_Repo_CO_9357.csv → reform_AirQuality_UCI_ML_Repo_CO_9357.csv
Start to Process: AirQuality_UCI_ML_Repo_CO_3000.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: AirQuality_UCI_ML_Repo_CO_3000.csv → reform_AirQuality_UCI_ML_Repo_CO_3000.csv
Start to Process: AirQuality_UCI_ML_Repo_CO_1000.csv
Processed: AirQuality_UCI_ML_Repo_CO_1000.csv → reform_AirQuality_UCI_ML_Repo_CO_1000.csv
Start to Process: AirQuality_UCI_ML_Repo_O3_9357.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime
  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: AirQuality_UCI_ML_Repo_O3_9357.csv → reform_AirQuality_UCI_ML_Repo_O3_9357.csv
Start to Process: AirQuality_UCI_ML_Repo_O3_3000.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: AirQuality_UCI_ML_Repo_O3_3000.csv → reform_AirQuality_UCI_ML_Repo_O3_3000.csv
Start to Process: AirQuality_UCI_ML_Repo_O3_1000.csv
Processed: AirQuality_UCI_ML_Repo_O3_1000.csv → reform_AirQuality_UCI_ML_Repo_O3_1000.csv
Start to Process: AirQuality_UCI_ML_Repo_RH_9357.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime
  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: AirQuality_UCI_ML_Repo_RH_9357.csv → reform_AirQuality_UCI_ML_Repo_RH_9357.csv
Start to Process: AirQuality_UCI_ML_Repo_RH_3000.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: AirQuality_UCI_ML_Repo_RH_3000.csv → reform_AirQuality_UCI_ML_Repo_RH_3000.csv
Start to Process: AirQuality_UCI_ML_Repo_RH_1000.csv
Processed: AirQuality_UCI_ML_Repo_RH_1000.csv → reform_AirQuality_UCI_ML_Repo_RH_1000.csv
Start to Process: WeatherDataset_DARTS_windspeed_10000.csv
Processed: WeatherDataset_DARTS_windspeed_10000.csv → reform_WeatherDataset_DARTS_windspeed_10000.csv
Start to Process: WeatherDataset_DARTS_windspeed_1000.csv
Processed: WeatherDataset_DARTS_windspeed_1000.csv → reform_WeatherDataset_DARTS_windspeed_1000.csv
Start to Process: WeatherDataset_DARTS_windspeed_3000.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: WeatherDataset_DARTS_windspeed_3000.csv → reform_WeatherDataset_DARTS_windspeed_3000.csv
Start to Process: WeatherDataset_DARTS_pressure_10000.csv
Processed: WeatherDataset_DARTS_pressure_10000.csv → reform_WeatherDataset_DARTS_pressure_10000.csv
Start to Process: WeatherDataset_DARTS_pressure_1000.csv
Processed: WeatherDataset_DARTS_pressure_1000.csv → reform_WeatherDataset_DARTS_pressure_1000.csv
Start to Process: WeatherDataset_DARTS_pressure_3000.csv
Processed: WeatherDataset_DARTS_pressure_3000.csv → reform_WeatherDataset_DARTS_pressure_3000.csv
Start to Process: EnergyDataset_DARTS_fossilgas_10000.csv
Processed: EnergyDataset_DARTS_fossilgas_10000.csv → reform_EnergyDataset_DARTS_fossilgas_10000.csv
Start to Process: EnergyDataset_DARTS_fossilgas_1000.csv
Processed: EnergyDataset_DARTS_fossilgas_1000.csv → reform_EnergyDataset_DARTS_fossilgas_1000.csv
Start to Process: EnergyDataset_DARTS_fossilgas_3000.csv
Processed: EnergyDataset_DARTS_fossilgas_3000.csv → reform_Ene

  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: ETTh1Dataset_DARTS_OT_17420.csv → reform_ETTh1Dataset_DARTS_OT_17420.csv
Start to Process: ETTh1Dataset_DARTS_OT_10000.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: ETTh1Dataset_DARTS_OT_10000.csv → reform_ETTh1Dataset_DARTS_OT_10000.csv
Start to Process: ETTh1Dataset_DARTS_OT_3000.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime
  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: ETTh1Dataset_DARTS_OT_3000.csv → reform_ETTh1Dataset_DARTS_OT_3000.csv
Start to Process: ETTh1Dataset_DARTS_OT_1000.csv
Processed: ETTh1Dataset_DARTS_OT_1000.csv → reform_ETTh1Dataset_DARTS_OT_1000.csv
Start to Process: ETTh2Dataset_DARTS_OT_17420.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: ETTh2Dataset_DARTS_OT_17420.csv → reform_ETTh2Dataset_DARTS_OT_17420.csv
Start to Process: ETTh2Dataset_DARTS_OT_10000.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: ETTh2Dataset_DARTS_OT_10000.csv → reform_ETTh2Dataset_DARTS_OT_10000.csv
Start to Process: ETTh2Dataset_DARTS_OT_3000.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: ETTh2Dataset_DARTS_OT_3000.csv → reform_ETTh2Dataset_DARTS_OT_3000.csv
Start to Process: ETTh2Dataset_DARTS_OT_1000.csv
Processed: ETTh2Dataset_DARTS_OT_1000.csv → reform_ETTh2Dataset_DARTS_OT_1000.csv
Start to Process: ETTm1Dataset_DARTS_OT_20000.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime
  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: ETTm1Dataset_DARTS_OT_20000.csv → reform_ETTm1Dataset_DARTS_OT_20000.csv
Start to Process: ETTm1Dataset_DARTS_OT_10000.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: ETTm1Dataset_DARTS_OT_10000.csv → reform_ETTm1Dataset_DARTS_OT_10000.csv
Start to Process: ETTm1Dataset_DARTS_OT_3000.csv
Processed: ETTm1Dataset_DARTS_OT_3000.csv → reform_ETTm1Dataset_DARTS_OT_3000.csv
Start to Process: ETTm1Dataset_DARTS_OT_1000.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime
  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: ETTm1Dataset_DARTS_OT_1000.csv → reform_ETTm1Dataset_DARTS_OT_1000.csv
Start to Process: ETTm2Dataset_DARTS_OT_20000.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: ETTm2Dataset_DARTS_OT_20000.csv → reform_ETTm2Dataset_DARTS_OT_20000.csv
Start to Process: ETTm2Dataset_DARTS_OT_10000.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: ETTm2Dataset_DARTS_OT_10000.csv → reform_ETTm2Dataset_DARTS_OT_10000.csv
Start to Process: ETTm2Dataset_DARTS_OT_3000.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: ETTm2Dataset_DARTS_OT_3000.csv → reform_ETTm2Dataset_DARTS_OT_3000.csv
Start to Process: ETTm2Dataset_DARTS_OT_1000.csv
Processed: ETTm2Dataset_DARTS_OT_1000.csv → reform_ETTm2Dataset_DARTS_OT_1000.csv
Start to Process: SunspotsDataset_DARTS_500.csv
Processed: SunspotsDataset_DARTS_500.csv → reform_SunspotsDataset_DARTS_500.csv
Start to Process: AusBeerDataset_DARTS_211.csv
Processed: AusBeerDataset_DARTS_211.csv → reform_AusBeerDataset_DARTS_211.csv
Start to Process: GasRateDataset_DARTS_296.csv
Processed: GasRateDataset_DARTS_296.csv → reform_GasRateDataset_DARTS_296.csv
Start to Process: CO2Dataset_DARTS_296.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: CO2Dataset_DARTS_296.csv → reform_CO2Dataset_DARTS_296.csv
Start to Process: TaylorDataset_DARTS_4032.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: TaylorDataset_DARTS_4032.csv → reform_TaylorDataset_DARTS_4032.csv
Start to Process: WineDataset_DARTS_176.csv
Processed: WineDataset_DARTS_176.csv → reform_WineDataset_DARTS_176.csv
Start to Process: WoolyDataset_DARTS_119.csv
Processed: WoolyDataset_DARTS_119.csv → reform_WoolyDataset_DARTS_119.csv
Start to Process: USGasolineDataset_DARTS_1578.csv


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime
  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime
  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime


Processed: USGasolineDataset_DARTS_1578.csv → reform_USGasolineDataset_DARTS_1578.csv
Start to Process: HeartRateDataset_DARTS_1800.csv
Processed: HeartRateDataset_DARTS_1800.csv → reform_HeartRateDataset_DARTS_1800.csv
All files reformatted and saved successfully!


  df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')  # Convert to datetime
