In [None]:
import pandas as pd

In [None]:
df_time = pd.read_csv('time_series_60min_singleindex.csv')

In [None]:
df_time.head()

In [None]:
df = df_time[['utc_timestamp', 'DE_solar_generation_actual']]
df_solar = df.copy()
df_solar.head()

taking data related to Germany

In [None]:
df_solar.rename(columns={'utc_timestamp': 'ds', 'DE_solar_generation_actual': 'y'}, inplace=True)
df_solar.head()

In [None]:
df_solar['ds'] = pd.to_datetime(df_solar['ds']).dt.tz_localize(None)
df_solar.head()

In [None]:
df_solar['y'].interpolate(method='linear', inplace=True)
df_solar.head()

In [None]:
from prophet import Prophet

# Train Prophet model
model = Prophet()
model.fit(df_solar)

# Generate forecasts
forecast = model.predict(df_solar)

# Extract Prophet components
df_features = forecast[['ds', 'yhat', 'trend', 'weekly', 'yearly', 'additive_terms']]


In [None]:
df_timeseries = pd.merge(df_solar, df_features, on='ds', how='inner')


In [None]:
df_weather = pd.read_csv('weather_data.csv')

columns = ['utc_timestamp', 'DE_temperature', 
                    'DE_radiation_direct_horizontal', 'DE_radiation_diffuse_horizontal']

df_weather = df_weather[columns]
df_weather.head()

In [None]:
df_pv = pd.read_csv('ninja_pv_wind_profiles_singleindex.csv')
columns = ['time', 'DE_pv_national_current']

df_pv = df_pv[columns]
df_pv.head()

In [None]:
df_weather.rename(columns={'utc_timestamp': 'ds'}, inplace=True)
df_weather['ds'] = pd.to_datetime(df_weather['ds']).dt.tz_localize(None)

In [None]:
df_pv.rename(columns={'time': 'ds'}, inplace=True)
df_pv['ds'] = pd.to_datetime(df_pv['ds']).dt.tz_localize(None)

In [None]:
df_merged = pd.merge(df_timeseries, df_weather, on='ds', how='inner')
df_merged.head()

In [None]:
df = pd.merge(df_merged, df_pv, on='ds', how='inner')
df.head()

In [None]:
df.drop(columns=['yhat'], inplace=True)
df.head()

In [None]:
df['Year'] = pd.to_datetime(df['ds']).dt.year
df['Month'] = pd.to_datetime(df['ds']).dt.month
df['Day'] = pd.to_datetime(df['ds']).dt.day
df['Hour'] = pd.to_datetime(df['ds']).dt.hour
df['DayOfWeek'] = pd.to_datetime(df['ds']).dt.dayofweek

In [None]:
df.head()

In [None]:
df_time.head()

In [None]:
columns = ['utc_timestamp', 'DE_solar_capacity']

df_time = df_time[columns]
df_time.head()

In [None]:
df_time.rename(columns={'utc_timestamp': 'ds'}, inplace=True)
df_time['ds'] = pd.to_datetime(df_time['ds']).dt.tz_localize(None)
df = pd.merge(df, df_time, on='ds', how='left')
df.head()

In [None]:
# Drop rows where 'y' is NaN
df = df.dropna(subset=['y'])

In [None]:
df.drop(columns=['ds'], inplace=True)

In [None]:
df.head()

In [None]:
y = df['y']                 # Target variable
X = df.drop(columns=['y'])  # Drop the target variable

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
print(f"Training set shape: {X_train.shape}")
print(f"Test set shape: {X_test.shape}")

<h2>XGBoost</h2>

In [None]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error

# Convert data into DMatrix (optional, but efficient for XGBoost)
train_data = xgb.DMatrix(X_train, label=y_train)
test_data = xgb.DMatrix(X_test, label=y_test)

# Define parameters
params = {
    'objective': 'reg:squarederror',  # Regression task
    'max_depth': 6,
    'eta': 0.1,  # Learning rate
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'seed': 42
}

# Train model
num_round = 100
model = xgb.train(params, train_data, num_round)

# Predict
y_pred = model.predict(test_data)

# Evaluate
rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f"RMSE: {rmse}")

In [None]:
import matplotlib.pyplot as plt

xgb.plot_importance(model)
plt.show()

<h2>Random Forest Regressor</h2>

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
# Define the model
rf_model = RandomForestRegressor(
    n_estimators=100,  # Number of trees
    max_depth=10,      # Maximum depth of trees
    random_state=42,   # Reproducibility
    n_jobs=-1          # Parallel processing
)

# Train the model
rf_model.fit(X_train, y_train)


In [None]:
# Predict on the test set
y_pred = rf_model.predict(X_test)

# Calculate RMSE
rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f"Random Forest RMSE: {rmse}")
