In [None]:
import sys
import os
import pandas as pd
import matplotlib.pyplot as plt

# Add the project root to the Python path to allow imports from other directories
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(project_root)

# Add the data directory to the Python path
data_dir = os.path.join(project_root, 'data')
sys.path.append(data_dir)

from data.loaders import load_raw_data, split_train_val_test
from data.preprocessors import preprocess_data

In [None]:
# Define file paths
train_file_path = os.path.join(data_dir, 'raw', 'Train.csv')

# Load the raw data
train_data = load_raw_data(train_file_path)

print("Train data shape:", train_data.shape)

# Display the first few rows and data info of the train dataset
print("\nFirst few rows of the train dataset:")
print(train_data.head())
print("\nTrain dataset info:")
train_data.info()

In [None]:
# Drop 'Unnamed: 0', 'Time', and 'Location' columns
train_data = train_data.drop(columns=['Unnamed: 0', 'Time', 'Location'], errors='ignore')

# Split the data into training, validation, and test sets
train_df, val_df, test_df = split_train_val_test(train_data, test_ratio=0.15, val_ratio=0.15, random_state=42)

print("\nAfter splitting:")
print("Training set shape:", train_df.shape)
print("Validation set shape:", val_df.shape)
print("Test set shape:", test_df.shape)

In [None]:
input_columns = ['Temp_2m', 'RelHum_2m', 'DP_2m', 'WS_10m', 'WS_100m', 'WD_10m', 'WD_100m', 'WG_10m']
output_column = 'Power'

# Isolate input and output variables in training validation, and test sets
train_X = train_df[input_columns]
train_y = train_df[output_column]

val_X = val_df[input_columns]
val_y = val_df[output_column]

test_X = test_df[input_columns]
test_y = test_df[output_column]

# Preprocess (scale) the input features
train_X_scaled = preprocess_data(train_X)
val_X_scaled = preprocess_data(val_X)
test_X_scaled = preprocess_data(test_X)

print("Training set scaled shape:", train_X_scaled.shape)
print("Validation set scaled shape:", val_X_scaled.shape)
print("Test set scaled shape:", test_X_scaled.shape)