In [25]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from lightgbm import LGBMRegressor
import matplotlib.pyplot as plt

In [26]:
df = pd.read_csv('Sunspots_new.csv')

In [27]:
y=df['Monthly Mean Total Sunspot Number']
X=df.drop(['Monthly Mean Total Sunspot Number'], axis=1)

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [29]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [30]:
model = LGBMRegressor(random_state=42)
model.fit(X_train_scaled, y_train)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000303 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2308
[LightGBM] [Info] Number of data points in the train set: 1993, number of used features: 14
[LightGBM] [Info] Start training from score 88.373808


0,1,2
,boosting_type,'gbdt'
,num_leaves,31
,max_depth,-1
,learning_rate,0.1
,n_estimators,100
,subsample_for_bin,200000
,objective,
,class_weight,
,min_split_gain,0.0
,min_child_weight,0.001


In [31]:
y_pred = model.predict(X_test_scaled)



In [32]:
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)


In [33]:
print("🔹 LightGBM Sonuçları")
print(f"R² Score: {r2:.4f}")
print(f"MSE: {mse:.2f}")
print(f"MAE: {mae:.2f}")

🔹 LightGBM Sonuçları
R² Score: 0.9920
MSE: 42.97
MAE: 2.84


In [34]:
results_df = X_test.copy()
results_df['Gerçek_Sunspot'] = y_test.values
results_df['Tahmin_Sunspot'] = y_pred
results_df['Hata'] = y_test.values - y_pred
results_df.reset_index(drop=True, inplace=True)
results_df.to_csv("lightgbm_tahmin.csv", index=False)

In [35]:

df1 = pd.read_csv('lightgbm_tahmin.csv')

In [36]:
df1

Unnamed: 0,Year,Month,Day,Diff_11Year,Sunspot_lag1,Sunspot_lag132,Sunspot_lag12,Diff_1Year,Month_sin,Month_cos,Year_sin,Year_cos,RollMean_11year,RollStd_11year,Gerçek_Sunspot,Tahmin_Sunspot,Hata
0,1997,8,31,25.8,12.9,9.9,19.7,22.8,-8.660254e-01,-0.500000,-2.817326e-01,-0.959493,97.415909,79.461234,35.7,32.933328,2.766672
1,1985,12,31,-13.8,17.9,29.6,21.4,-2.1,-2.449294e-16,1.000000,2.817326e-01,-0.959493,107.620455,82.711072,15.8,14.898563,0.901437
2,1929,6,30,20.9,97.0,99.0,152.4,22.9,1.224647e-16,-1.000000,7.557496e-01,-0.654861,74.645455,48.538002,119.9,121.788684,-1.888684
3,1992,7,31,-91.1,98.5,205.3,240.2,15.7,-5.000000e-01,-0.866025,5.406408e-01,0.841254,117.121212,79.579108,114.2,115.738083,-1.538083
4,1900,12,31,-10.7,7.5,11.2,17.6,-7.0,-2.449294e-16,1.000000,-9.898214e-01,-0.142315,69.562121,49.654981,0.5,1.582819,-1.082819
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
494,1841,11,30,-190.4,47.4,223.3,90.4,-14.5,-5.000000e-01,0.866025,7.557496e-01,-0.654861,107.875000,78.278656,32.9,31.797701,1.102299
495,1834,7,31,-47.3,13.1,61.7,11.6,1.3,-5.000000e-01,-0.866025,-9.898214e-01,-0.142315,94.372727,83.544880,14.4,13.554614,0.845386
496,1980,6,30,72.6,254.7,150.1,211.7,-32.0,1.224647e-16,-1.000000,9.312268e-15,1.000000,95.796212,68.821520,222.7,207.994986,14.705014
497,1978,8,31,-69.4,99.7,151.8,43.0,-17.3,-8.660254e-01,-0.500000,-9.096320e-01,0.415415,85.721212,52.543331,82.4,84.269087,-1.869087


In [37]:
plt.figure(figsize=(14, 8))
plt.plot(df['Date'], y, color='lightgray', label='Full Data')
plt.plot(date_train, y_train, label='Train (80%)', color='blue')
plt.plot(date_test, y_test, label='Test (20%)', color='orange')

# Apply each model
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    plt.plot(date_test, y_pred, label=f'Prediction - {name}')

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    print(f"{name} RMSE: {rmse:.2f}")

plt.title("Model Predictions on Sunspot Test Data (20%)")
plt.xlabel("Date")
plt.ylabel("Sunspot Number")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

KeyError: 'Date'

<Figure size 1400x800 with 0 Axes>