In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns
from lightgbm import LGBMRegressor
# 🔽 Veri yükle
df = pd.read_csv("Sunspots.csv")
df.drop(columns='Unnamed: 0', inplace=True)
df.drop(index=df[df["Monthly Mean Total Sunspot Number"] == 0].index,inplace=True)
df['Date'] = pd.to_datetime(df['Date'])

# 🔁 Feature Engineering
df['Sunspot_lag1'] = df['Monthly Mean Total Sunspot Number'].shift(1)
df['Diff_11Year'] = df['Monthly Mean Total Sunspot Number'].diff(periods=11)
df['Diff_1Year'] = df['Monthly Mean Total Sunspot Number'].diff(periods=1)
df['Month'] = df['Date'].dt.month
df['Month_sin'] = np.sin(2 * np.pi * df['Month'] / 12)
df['Month_cos'] = np.cos(2 * np.pi * df['Month'] / 12)
df['Year'] = df['Date'].dt.year
df['Rolling_11Yr'] = df['Monthly Mean Total Sunspot Number'].rolling(window=132, min_periods=1).mean()

df.dropna(inplace=True)

# 🎯 Feature set (sunspot_lag12 çıkarıldı)
features = ['Sunspot_lag1', 'Diff_11Year', 'Diff_1Year', 'Month_sin', 'Month_cos', 'Year']
# NOT: 'Rolling_11Yr' çıkarıldı

X = df[features]
y = df['Monthly Mean Total Sunspot Number']
dates = df['Date']

# 🔀 Train-test split
split_index = int(len(df) * 0.8)
X_train, X_test = X.iloc[:split_index], X.iloc[split_index:]
y_train, y_test = y.iloc[:split_index], y.iloc[split_index:]
date_test = dates.iloc[split_index:]


In [17]:
import lightgbm as lgb
from lightgbm import early_stopping, log_evaluation

model_lgb = lgb.LGBMRegressor(
    n_estimators=1000,
    learning_rate=0.01,
    max_depth=6,
    num_leaves=31,
    subsample=0.8,
    colsample_bytree=0.8,
    reg_alpha=0.5,
    reg_lambda=0.5,
    random_state=42
)

model_lgb.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    eval_metric='rmse',
    callbacks=[early_stopping(50), log_evaluation(0)]
)

y_pred_lgb = model_lgb.predict(X_test)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000132 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1004
[LightGBM] [Info] Number of data points in the train set: 2549, number of used features: 6
[LightGBM] [Info] Start training from score 82.574108
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[788]	valid_0's rmse: 5.47186	valid_0's l2: 29.9413


In [18]:
from sklearn.ensemble import RandomForestRegressor

model_rf = RandomForestRegressor(n_estimators=100, random_state=42)
model_rf.fit(X_train, y_train)
y_pred_rf = model_rf.predict(X_test)


In [19]:
from sklearn.neighbors import KNeighborsRegressor

scaler = MinMaxScaler()
X_train_knn = scaler.fit_transform(X_train)
X_test_knn = scaler.transform(X_test)

model_knn = KNeighborsRegressor(n_neighbors=5)
model_knn.fit(X_train_knn, y_train)
y_pred_knn = model_knn.predict(X_test_knn)


In [20]:
from sklearn.linear_model import LinearRegression

model_lr = LinearRegression()
model_lr.fit(X_train, y_train)
y_pred_lr = model_lr.predict(X_test)


In [21]:
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

def evaluate(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))  # Kare alıp kökünü aldık
    r2 = r2_score(y_true, y_pred)
    return {"RMSE": rmse, "R2": r2}


results = {
    "LightGBM": evaluate(y_test, y_pred_lgb),
    "Random Forest": evaluate(y_test, y_pred_rf),
    "KNN": evaluate(y_test, y_pred_knn),
    "Linear Regression": evaluate(y_test, y_pred_lr),
}

pd.DataFrame(results).T


Unnamed: 0,RMSE,R2
LightGBM,5.471863,0.993844
Random Forest,2.505429,0.998709
KNN,23.10776,0.890207
Linear Regression,1.952751e-14,1.0


In [22]:
corr = df[['Monthly Mean Total Sunspot Number', 'Rolling_11Yr']].corr()
print(corr)


                                   Monthly Mean Total Sunspot Number  \
Monthly Mean Total Sunspot Number                            1.00000   
Rolling_11Yr                                                 0.35662   

                                   Rolling_11Yr  
Monthly Mean Total Sunspot Number       0.35662  
Rolling_11Yr                            1.00000  
