In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from lightgbm import LGBMRegressor

In [None]:
df = pd.read_csv("Sunspots.csv")
df['Sunspot_lag12'] = df['Monthly Mean Total Sunspot Number'].shift(12)
df['Sunspot_lag1'] = df['Monthly Mean Total Sunspot Number'].shift(1)
df['Diff_12Year'] = df['Monthly Mean Total Sunspot Number'].diff(periods=12)
df['Diff_1Year']=df['Monthly Mean Total Sunspot Number'].diff(periods=1)


Unnamed: 0.1,Unnamed: 0,Date,Monthly Mean Total Sunspot Number,Sunspot_lag12,Sunspot_lag1,Diff_12Year,Diff_1Year
3250,3250,2019-11-30,0.5,4.9,0.4,-4.4,0.1
3251,3251,2019-12-31,1.5,3.1,0.5,-1.6,1.0
3252,3252,2020-01-31,6.2,7.7,1.5,-1.5,4.7
3253,3253,2020-02-29,0.2,0.8,6.2,-0.6,-6.0
3254,3254,2020-03-31,1.5,9.4,0.2,-7.9,1.3
3255,3255,2020-04-30,5.2,9.1,1.5,-3.9,3.7
3256,3256,2020-05-31,0.2,9.9,5.2,-9.7,-5.0
3257,3257,2020-06-30,5.8,1.2,0.2,4.6,5.6
3258,3258,2020-07-31,6.1,0.9,5.8,5.2,0.3
3259,3259,2020-08-31,7.5,0.5,6.1,7.0,1.4


In [None]:
features = ['Diff_11Year','Month_sin','Month_cos','Year_sin','Year_cos',
            'Sunspot_lag1', 'Sunspot_Lag_12','Sunspot_lag132','Diff_12Year','Diff_1Year']
X = df[features]
y = df['Monthly Mean Total Sunspot Number']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
model = LGBMRegressor(random_state=42)
model.fit(X_train_scaled, y_train)


In [None]:
y_pred = model.predict(X_test_scaled)

In [None]:
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

In [None]:
print("🔹 LightGBM ")
print(f"R² Score: {r2}")
print(f"MSE: {mse}")
print(f"MAE: {mae}")


In [None]:
results_df = X_test.copy()
results_df['Gerçek_Sunspot'] = y_test.values
results_df['Tahmin_Sunspot'] = y_pred
results_df['fark'] = y_test.values - y_pred
results_df.reset_index(drop=True, inplace=True)
results_df.to_csv("lightgbm_tahmin_sonuclari1.csv", index=False)

In [None]:
df = pd.read_csv("lightgbm_tahmin_sonuclari1.csv")
df

In [None]:


import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import lightgbm as lgb


# Load the dataset
df = pd.read_csv("Sunspots.csv")
df = df.rename(columns={'Monthly Mean Total Sunspot Number': 'SunspotNumber'})

# Convert 'Date' to datetime
df['Date'] = pd.to_datetime(df['Date'])

# Extract year, month, and day features
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day

# Create cyclical features for month and day
df['Month_sin'] = np.sin(2 * np.pi * df['Month'] / 12)
df['Month_cos'] = np.cos(2 * np.pi * df['Month'] / 12)
df['Day_sin'] = np.sin(2 * np.pi * df['Day'] / 31)
df['Day_cos'] = np.cos(2 * np.pi * df['Day'] / 31)

# Create lag features
df['Sunspot_lag1'] = df['SunspotNumber'].shift(1)
df['Sunspot_lag12'] = df['SunspotNumber'].shift(12)
# Approximate 11-year cycle difference (132 months)
df['Diff_11Year'] = df['SunspotNumber'].diff(periods=132)

# Drop rows with NaN values created by lagging and differencing
df.dropna(inplace=True)

# Define features and target
features = ['Month_sin', 'Month_cos', 'Day_sin', 'Day_cos', 'Year',
            'Sunspot_lag1', 'Sunspot_lag12', 'Diff_11Year']
X = df[features]
y = df['SunspotNumber']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train a LightGBM Regressor model
lgbm = lgb.LGBMRegressor(random_state=42)
lgbm.fit(X_train, y_train)

# Make predictions on the test set
y_pred_lgbm = lgbm.predict(X_test)

# Evaluate the model
r2_lgbm = r2_score(y_test, y_pred_lgbm)
mse_lgbm = mean_squared_error(y_test, y_pred_lgbm)
mae_lgbm = mean_absolute_error(y_test, y_pred_lgbm)

print("LightGBM Model Evaluation on Test Set:")
print(f"R² Score: {r2_lgbm:.4f}")
print(f"MSE: {mse_lgbm:.2f}")
print(f"MAE: {mae_lgbm:.2f}")


# --- Forecasting the next 11 years (132 months) ---

# Find the last date in the original dataset
last_date = df['Date'].max()

# Create future dates for the next 11 years (132 months)
future_dates_11_year = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=132, freq='MS')

# Create DataFrame for future predictions
future_df_11_year = pd.DataFrame({'Date': future_dates_11_year})
future_df_11_year['Year'] = future_df_11_year['Date'].dt.year
future_df_11_year['Month'] = future_df_11_year['Date'].dt.month
future_df_11_year['Day'] = future_df_11_year['Date'].dt.day # Day can be set to 1

# Create cyclical features for future dates
future_df_11_year['Month_sin'] = np.sin(2 * np.pi * future_df_11_year['Month'] / 12)
future_df_11_year['Month_cos'] = np.cos(2 * np.pi * future_df_11_year['Month'] / 12)
future_df_11_year['Day_sin'] = np.sin(2 * np.pi * future_df_11_year['Day'] / 31)
future_df_11_year['Day_cos'] = np.cos(2 * np.pi * future_df_11_year['Day'] / 31)

# Add empty columns to hold lag features for future predictions
future_df_11_year['Sunspot_lag1'] = np.nan
future_df_11_year['Sunspot_lag12'] = np.nan
future_df_11_year['Diff_11Year'] = np.nan

# Prepare the feature DataFrame for prediction - fill with last known values initially
X_future_11_year = future_df_11_year[features].copy()

# Get the last known values from the original data for lag calculations
last_lag1_orig = df['SunspotNumber'].iloc[-1] if not df.empty else 0
last_lag12_orig = df['SunspotNumber'].iloc[-12:].values.tolist() if len(df) >= 12 else [0] * 12
last_diff11year_orig = df['Diff_11Year'].iloc[-1] if len(df) >= 132 else 0 # Use the calculated difference


# Iterative forecasting to handle lagged features
predicted_sunspots_11_year = []
# Use a list or deque to efficiently store the last 132 sunspot values for lag calculations
recent_sunspots = df['SunspotNumber'].iloc[-132:].tolist() if len(df) >= 132 else df['SunspotNumber'].tolist()

# Pad with zeros if initial data is less than 132 points
while len(recent_sunspots) < 132:
  recent_sunspots.insert(0, 0)


for i in range(len(future_df_11_year)):
    # Prepare the features for the current step
    current_future_features = future_df_11_year.iloc[i][features].copy()

    # Update lag features using the `recent_sunspots` list
    current_future_features['Sunspot_lag1'] = recent_sunspots[-1] if len(recent_sunspots) >= 1 else last_lag1_orig
    current_future_features['Sunspot_lag12'] = recent_sunspots[-12] if len(recent_sunspots) >= 12 else last_lag12_orig[-(12 - len(recent_sunspots))] if len(recent_sunspots) < 12 and len(last_lag12_orig) >= (12 - len(recent_sunspots)) else (last_lag12_orig[0] if len(last_lag12_orig) > 0 else 0)

    # Calculate Diff_11Year using recent_sunspots
    value_132_months_ago = recent_sunspots[0] if len(recent_sunspots) >= 132 else None

    if value_132_months_ago is not None:
         current_future_features['Diff_11Year'] = recent_sunspots[-1] - value_132_months_ago
    else:
         # If data 132 months ago is not in recent_sunspots, use the last known difference from original data
         current_future_features['Diff_11Year'] = last_diff11year_orig


    # Predict the sunspot number for the current step
    # Ensure the features are in the correct format for prediction (e.g., numpy array with correct shape)
    predicted_value = lgbm.predict(current_future_features.values.reshape(1, -1))[0]
    predicted_sunspots_11_year.append(predicted_value)

    # Add the predicted value to the `recent_sunspots` list and remove the oldest value to maintain window size
    recent_sunspots.append(predicted_value)
    recent_sunspots.pop(0) # Remove the oldest value


# Add predictions to the future 11 year DataFrame
future_df_11_year['Forecast_Sunspot'] = predicted_sunspots_11_year

# Combine original data and future 11 year forecast for plotting
df_plot_11_year = pd.concat([df[['Date', 'SunspotNumber']],
                            future_df_11_year[['Date', 'Forecast_Sunspot']].rename(columns={'Forecast_Sunspot': 'SunspotNumber'})], ignore_index=True)

# Plotting
plt.figure(figsize=(15, 7))

# Plot actual values
plt.plot(df_plot_11_year['Date'][df_plot_11_year['Date'] <= last_date],
         df_plot_11_year['SunspotNumber'][df_plot_11_year['Date'] <= last_date],
         label='Actual Values', color='blue')

# Plot forecast values
plt.plot(df_plot_11_year['Date'][df_plot_11_year['Date'] > last_date],
         df_plot_11_year['SunspotNumber'][df_plot_11_year['Date'] > last_date],
         label='Next 11 Years Forecast', color='red', linestyle='--')

# Vertical line indicating the start of the forecast
plt.axvline(x=last_date, color='gray', linestyle=':', label='Forecast Start')

# Title and Axis Labels
plt.title('Sunspot Number - Actual and Next 11 Years Forecast (LightGBM)')
plt.xlabel('Date')
plt.ylabel('Sunspot Number')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


print("\nPredicted Sunspot Numbers for the next 11 years:")
print(future_df_11_year[['Date', 'Forecast_Sunspot']])


In [None]:

last_date_original = df['Date'].max()
start_date_past_50_years = last_date_original - pd.DateOffset(years=49) 

df_past_50_years = df[df['Date'] >= start_date_past_50_years].copy()


df_plot_50_years = pd.concat([df_past_50_years[['Date', 'SunspotNumber']],
                             future_df_11_year[['Date', 'Forecast_Sunspot']].rename(columns={'Forecast_Sunspot': 'SunspotNumber'})], ignore_index=True)


plt.figure(figsize=(15, 7))


plt.plot(df_plot_50_years['Date'][df_plot_50_years['Date'] <= last_date_original],
         df_plot_50_years['SunspotNumber'][df_plot_50_years['Date'] <= last_date_original],
         label='Past 50 Years Actual Values', color='blue')


plt.plot(df_plot_50_years['Date'][df_plot_50_years['Date'] > last_date_original],
         df_plot_50_years['SunspotNumber'][df_plot_50_years['Date'] > last_date_original],
         label='Next 11 Years Forecast', color='red', linestyle='--')



plt.axvline(x=last_date_original, color='gray', linestyle=':', label='Forecast Start')


plt.title('Sunspot Number - Past 50 Years Actual and Next 11 Years Forecast (LightGBM)')
plt.xlabel('Date')
plt.ylabel('Sunspot Number')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()