In [24]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
import data_prep
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [25]:
df = pd.read_csv('SolarPrediction.csv')
df

Unnamed: 0,UNIXTime,Data,Time,Radiation,Temperature,Pressure,Humidity,WindDirection(Degrees),Speed,TimeSunRise,TimeSunSet
0,1475229326,9/29/2016 12:00:00 AM,23:55:26,1.21,48,30.46,59,177.39,5.62,06:13:00,18:13:00
1,1475229023,9/29/2016 12:00:00 AM,23:50:23,1.21,48,30.46,58,176.78,3.37,06:13:00,18:13:00
2,1475228726,9/29/2016 12:00:00 AM,23:45:26,1.23,48,30.46,57,158.75,3.37,06:13:00,18:13:00
3,1475228421,9/29/2016 12:00:00 AM,23:40:21,1.21,48,30.46,60,137.71,3.37,06:13:00,18:13:00
4,1475228124,9/29/2016 12:00:00 AM,23:35:24,1.17,48,30.46,62,104.95,5.62,06:13:00,18:13:00
...,...,...,...,...,...,...,...,...,...,...,...
32681,1480587604,12/1/2016 12:00:00 AM,00:20:04,1.22,44,30.43,102,145.42,6.75,06:41:00,17:42:00
32682,1480587301,12/1/2016 12:00:00 AM,00:15:01,1.17,44,30.42,102,117.78,6.75,06:41:00,17:42:00
32683,1480587001,12/1/2016 12:00:00 AM,00:10:01,1.20,44,30.42,102,145.19,9.00,06:41:00,17:42:00
32684,1480586702,12/1/2016 12:00:00 AM,00:05:02,1.23,44,30.42,101,164.19,7.87,06:41:00,17:42:00


In [26]:
X_train, X_val, X_test, y_train, y_val, y_test, scalers, feature_cols, transform_info, timestamps_train, timestamps_val, timestamps_test = data_prep.prepare_weather_data(
    df, 
    target_col='Radiation',
    window_size=24,  # 24 steps for daily patterns
    log_transform=True,
    use_solar_elevation=True
)

Adding solar elevation proxy feature
SolarElevation created for 32686 rows, 100.0% of data
Added 'Radiation_is_low' feature (threshold: 1.2000)
Added SolarElevation to features
Log-transformed Radiation -> Radiation_log
Using MinMaxScaler for feature scaling
X_train shape: (19596, 24, 17)
y_train shape: (19596, 1)
X_val shape: (6533, 24, 17)
y_val shape: (6533, 1)
X_test shape: (6533, 24, 17)
y_test shape: (6533, 1)
Features used: ['Radiation', 'Temperature', 'Pressure', 'Humidity', 'WindDirection(Degrees)', 'Speed', 'Radiation_is_low', 'SolarElevation', 'SunriseMinutes', 'SunsetMinutes', 'DaylightMinutes', 'TimeSinceSunrise', 'TimeUntilSunset', 'DaylightPosition', 'TimeMinutesSin', 'TimeMinutesCos', 'HourOfDay']


In [27]:
timestamps_train.shape


(19596,)

In [28]:
import tdmc
# Create and train the TDMC model
model = tdmc.SolarTDMC(
    n_states=4,
    time_slices=24,
    n_components=3,  # Use exactly 5 PCA components
    #state_names=['Clear', 'Partly Cloudy', 'Cloudy', 'Night']
)

In [None]:
model.fit(
    X_train,
    timestamps=timestamps_train,  # Your time indices
    max_iter=100,
    state_names=['Clear', 'Partly Cloudy', 'Cloudy', 'Night']  # Meaningful state names
)

Input data shape: (19596, 24, 3)
Setting n_emissions to: 3
Initialized emission_means shape: (4, 3)
Initialized emission_covars shape: (4, 3, 3)
n_sequences: 19596, sequence_length: 24, n_features: 3
Reshaped data shape: (470304, 3)
State assignments shape: (470304,)
Reshaped state assignments shape: (19596, 24)
State 0 data shape: (173988, 3)
State 1 data shape: (167232, 3)
State 2 data shape: (64908, 3)
State 3 data shape: (64176, 3)


In [None]:
# Generate forecasts correctly
predictions = []
lower_bounds = []
upper_bounds = []

# Loop through test data
for i in range(len(X_test)):
    # Get the current observation (last point in the window)
    current_obs = X_test[i, -1, :]  # Last time step of current window
    
    # Get the current timestamp hour
    current_hour = timestamps_test[i].hour
    
    # Reshape the data to match what the model expects
    # The model expects shape (n_samples, sequence_length, n_features)
    # where sequence_length is typically 1 for a single prediction point
    current_obs_reshaped = current_obs.reshape(1, 1, -1)
    
    # Forecast next step
    forecast, confidence_intervals = model.forecast(
        current_obs_reshaped,     # Correctly reshaped for the model
        current_hour,            # Just the hour (0-23)
        forecast_horizon=1       # Predict one step ahead
    )
    
    # Store predictions
    predictions.append(forecast[0][0])  # First step, first emission (radiation)
    lower_bounds.append(confidence_intervals[0][0][0])
    upper_bounds.append(confidence_intervals[1][0][0])

# Convert to numpy arrays
y_pred = np.array(predictions)
lower_bound = np.array(lower_bounds)
upper_bound = np.array(upper_bounds)

# If you applied transforms to the target, inverse transform the predictions
if 'log' in [t['type'] for t in transform_info['transforms'] if t['applied']]:
    # Find the log transform details
    log_transform = next(t for t in transform_info['transforms'] if t['type'] == 'log' and t['applied'])
    offset = log_transform.get('offset', 0)
    
    # Inverse transform
    y_pred_original = np.exp(y_pred) - offset
    lower_bound_original = np.exp(lower_bound) - offset
    upper_bound_original = np.exp(upper_bound) - offset
    
    # For test data too if it was transformed
    y_test_original = np.exp(y_test) - offset
else:
    y_pred_original = y_pred
    lower_bound_original = lower_bound
    upper_bound_original = upper_bound
    y_test_original = y_test

# Visualize the results
# Ensure we only compare predictions with actual values we have predictions for
min_length = min(len(y_pred_original), len(y_test_original))
fig = model.plot_prediction_vs_actual(
    y_test_original[:min_length],
    y_pred_original[:min_length],
    timestamps_test[:min_length],
    confidence_intervals=(lower_bound_original[:min_length], upper_bound_original[:min_length]),
    title="TDMC Solar Irradiance Prediction"
)

In [None]:
# Visualize hidden states
model.plot_hidden_states(X_test, timestamps_test)

# Visualize transition probabilities
model.plot_transition_heatmaps(hours=[6, 9, 12, 15, 18, 21])

# Visualize state characteristics
model.plot_state_characteristics()

# Evaluate multi-step forecasting
model.plot_forecast_horizon_accuracy(X_test, y_test, timestamps_test)

In [7]:
# Convert numeric columns
numeric_cols = ['UNIXTime', 'Radiation', 'Temperature', 'Pressure', 'Humidity', 
                   'WindDirection(Degrees)', 'Speed']
    
for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')
    
# Convert timestamps
df['Timestamp'] = pd.to_datetime(df['Data'] + ' ' + df['Time'])
    
# Extract hour for time slices
df['Hour'] = df['Timestamp'].dt.hour

In [None]:
import tdmc
n_states=3
X = df[['Radiation', 'Temperature']].values
timestamps = df['Hour'].values
# Initialize and train the model
model = tdmc.SolarTDMC(n_states=n_states, n_emissions=2, time_slices=24)

# Define state names based on radiation levels
state_names = [f"State_{i}" for i in range(n_states)]

# Train the model
model.fit(
    X, timestamps, 
    max_iter=50, 
    state_names=state_names
)

In [None]:
from datetime import datetime, timedelta
def generate_forecast(model, df, forecast_horizon=24):
    """Generate a forecast using the trained TDMC model."""
    
    # Use the last observation as the starting point
    last_obs = df[['Radiation', 'Temperature']].values[-1]
    last_hour = df['Hour'].values[-1]
    
    # Generate forecast
    forecasts, (conf_lower, conf_upper) = model.forecast(
        last_obs, last_hour, forecast_horizon
    )
    
    # Create timestamp range for forecast
    if len(df) > 0:
        last_timestamp = pd.to_datetime(df['Data'].iloc[-1] + ' ' + df['Time'].iloc[-1])
        forecast_times = [last_timestamp + timedelta(hours=i+1) for i in range(forecast_horizon)]
    else:
        # Create default timestamps if dataframe is empty
        start_time = datetime.now()
        forecast_times = [start_time + timedelta(hours=i+1) for i in range(forecast_horizon)]
    
    # Create forecast dataframe
    forecast_df = pd.DataFrame({
        'Timestamp': forecast_times,
        'Hour': [t.hour for t in forecast_times],
        'Forecasted_Radiation': forecasts[:, 0],
        'Forecasted_Temperature': forecasts[:, 1],
        'Radiation_Lower_CI': conf_lower[:, 0],
        'Radiation_Upper_CI': conf_upper[:, 0],
        'Temperature_Lower_CI': conf_lower[:, 1],
        'Temperature_Upper_CI': conf_upper[:, 1]
    })
    
    return forecast_df

forecast_df = generate_forecast(model, df, forecast_horizon=24) 
    


In [None]:
forecast_df

In [20]:
def plot_forecast(forecast_df):
    """Plot the forecasted radiation and temperature."""
    
    # Create figure with two subplots
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
    
    # Plot radiation forecast
    ax1.plot(forecast_df['Timestamp'], forecast_df['Forecasted_Radiation'], 'b-', label='Forecasted Radiation')
    ax1.fill_between(
        forecast_df['Timestamp'],
        forecast_df['Radiation_Lower_CI'],
        forecast_df['Radiation_Upper_CI'],
        alpha=0.3, color='b', label='95% Confidence Interval'
    )
    ax1.set_title('Radiation Forecast')
    ax1.set_xlabel('Time')
    ax1.set_ylabel('Radiation')
    ax1.legend()
    ax1.grid(True)
    
    # Plot temperature forecast
    ax2.plot(forecast_df['Timestamp'], forecast_df['Forecasted_Temperature'], 'r-', label='Forecasted Temperature')
    ax2.fill_between(
        forecast_df['Timestamp'],
        forecast_df['Temperature_Lower_CI'],
        forecast_df['Temperature_Upper_CI'],
        alpha=0.3, color='r', label='95% Confidence Interval'
    )
    ax2.set_title('Temperature Forecast')
    ax2.set_xlabel('Time')
    ax2.set_ylabel('Temperature (°F)')
    ax2.legend()
    ax2.grid(True)
    
    plt.tight_layout()
    return fig


In [None]:
fig = plot_forecast(forecast_df)
plt.show()

In [None]:
    # Test the Markov property
markov_test = tdmc.plot_
print(f"\nMarkov Property Test: {markov_test['result']}")
print(f"Message: {markov_test['message']}")
    


In [None]:
# Plot the transition matrix
tdmc.plot_transition_matrix()
    


In [None]:
# Plot state distributions for the first two features
tdmc.plot_state_distributions([0, 1])