In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [4]:
# Load datasets
simulated_data = pd.read_csv('Simulated_Weather_Data.csv')
training_data = pd.read_csv('Training_Data.csv')
testing_data = pd.read_csv('Testing_Data.csv')

# Inspect column names
print("Simulated Data Columns:", simulated_data.columns)
print("Training Data Columns:", training_data.columns)
print("Testing Data Columns:", testing_data.columns)

# Adjust column names based on inspection
# Replace 'Temperature', 'Wind', 'Humidity' with actual names in your datasets
common_features_simulated = ['Temperature', 'WindSpeed', 'Humidity']  # Adjust if necessary
common_features_training = ['temperature', 'wv (m/s)', 'rh (%)', 'precipitation']  # Adjust if necessary
common_features_testing = ['temperature', 'wv (m/s)', 'rh (%)', 'precipitation']  # Adjust if necessary

# Select relevant columns
simulated_data = simulated_data[['Time', 'Zone'] + common_features_simulated]
training_data = training_data[common_features_training].rename(
    columns={'temperature': 'Temperature', 'wv (m/s)': 'WindSpeed', 'rh (%)': 'Humidity', 'precipitation': 'Precipitation'}
)
testing_data = testing_data[common_features_testing].rename(
    columns={'temperature': 'Temperature', 'wv (m/s)': 'WindSpeed', 'rh (%)': 'Humidity', 'precipitation': 'Precipitation'}
)

# Add 'Zone' to training and testing datasets for consistency
training_data['Zone'] = 1
testing_data['Zone'] = 1

# Combine datasets
combined_data = pd.concat([simulated_data, training_data, testing_data], ignore_index=True)

# Handle missing values
combined_data.fillna(combined_data.mean(), inplace=True)

# Normalize the data
scaler = MinMaxScaler()
common_features = ['Temperature', 'WindSpeed', 'Humidity']
combined_data[common_features] = scaler.fit_transform(combined_data[common_features])

# Display the processed dataset
print(combined_data.head())

Simulated Data Columns: Index(['Time', 'Zone', 'Temperature', 'WindSpeed', 'Humidity'], dtype='object')
Training Data Columns: Index(['Unnamed: 0', 'date', 'temperature_max', 'temperature_min', 'wind',
       'weather', 'p (mbar)', 'temperature_x', 'Tpot (K)', 'Tdew (degC)',
       'rh (%)', 'VPmax (mbar)', 'VPact (mbar)', 'VPdef (mbar)', 'sh (g/kg)',
       'H2OC (mmol/mol)', 'rho (g/m**3)', 'wv (m/s)', 'max. wv (m/s)',
       'wd (deg)', 'precipitation_y', 'raining (s)', 'SWDR (W/m�)',
       'PAR (�mol/m�/s)', 'max. PAR (�mol/m�/s)', 'Tlog (degC)', 'OT',
       'Summary', 'precipitation_type', 'temperature_y',
       'Apparent Temperature (C)', 'Humidity', 'Wind Speed (km/h)',
       'Wind Bearing (degrees)', 'Visibility (km)', 'Loud Cover',
       'Pressure (millibars)', 'temperature', 'dwpt', 'rhum', 'precipitation',
       'wdir', 'wspd', 'pres'],
      dtype='object')
Testing Data Columns: Index(['Unnamed: 0', 'date', 'temperature_max', 'temperature_min', 'wind',
       'weather