In [4]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

# Load dataset
df = pd.read_csv(r"C:\Users\VIVEK KUMAR SINGH\Downloads\reshaped_data.csv")

# Replace blank (empty) values with 0
df.replace(r'^\s*$', 0, regex=True, inplace=True)

# Replace NaN values with 0
df.fillna(0, inplace=True)
print(df)



            id        50        53  100  103  150  153  200  203  250  ...  \
0       101000  0.909310  0.456836  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...   
1       101001  0.104083  0.493558  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...   
2       101002  0.089287  0.225000  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...   
3       101003  0.101333  0.272496  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...   
4       101004  0.076823  0.082900  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...   
...        ...       ...       ...  ...  ...  ...  ...  ...  ...  ...  ...   
11115  3402063  0.000000  0.000000  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...   
11116  3402069  0.000000  0.000000  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...   
11117  3402073  0.000000  0.000000  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...   
11118  3402088  0.000000  0.000000  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...   
11119  3402096  0.000000  0.000000  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...   

       3400  3403  3450  3453  3473  3500  3503  3550     3553 

In [None]:
# Convert all values (except ID) to numeric
df.iloc[:, 1:] = df.iloc[:, 1:].apply(pd.to_numeric, errors='coerce').fillna(0)

# Extract timestamps (excluding 'id' column)
timestamps = [col for col in df.columns if col != 'id']
timestamps.sort()

# Prepare features (X) and target (y)
X, y = [], []

for i in range(len(df)):
    node_data = df.iloc[i, 1:].values  # Ignore 'id'
    for t in range(len(node_data) - 1):
        X.append(node_data[:t + 1])  # Past values
        y.append(node_data[t + 1])   # Next value

# Pad sequences to the same length
max_length = max(len(seq) for seq in X)
X_padded = np.array([np.pad(seq, (0, max_length - len(seq)), 'constant') for seq in X])

# Convert y to a NumPy array
y = np.array(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42)

# Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate model performance
mae = mean_absolute_error(y_test, y_pred)
print(f"\nModel Performance: MAE = {mae:.4f}")

# Predict next energy consumption per node
predictions, next_timestamps = [], []

for i in range(len(df)):
    node_data = df.iloc[i, 1:].values
    node_data_padded = np.pad(node_data, (0, max_length - len(node_data)), 'constant')
    prediction = model.predict(node_data_padded.reshape(1, -1))
    predictions.append(prediction[0])

    # Get next timestamp
    last_time = timestamps[len(node_data) - 1]
    next_time_index = timestamps.index(last_time) + 1 if last_time in timestamps else None
    next_time = timestamps[next_time_index] if next_time_index and next_time_index < len(timestamps) else "Unknown"
    next_timestamps.append(next_time)

# Save predictions
output_df = pd.DataFrame({'Node_ID': df.iloc[:, 0], 'Next_Timestamp': next_timestamps, 'Predicted_Next_Energy': predictions})
output_df.to_csv(r"C:\Users\VIVEK KUMAR SINGH\Downloads\predicted_energy_with_timestamp.csv", index=False)

print("\nPredictions saved successfully!")

