In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor

# --- 1. Dosyaları Yükle ---
train_df = pd.read_csv("/kaggle/input/student-data-2/processed_train_student - processed_train_student.csv")
test_df = pd.read_csv("/kaggle/input/student-data-2/processed_test_student - processed_test_student.csv")
lab_df = pd.read_csv("/kaggle/input/midterm-data/lab_similarity_scores_alluids.csv")
lc_df = pd.read_csv("/kaggle/input/lcs-sims/lc_similarity_full.csv")
quiz_df = pd.read_csv("/kaggle/input/midterm-data/quiz_similarity_scores_alluids.csv")
lab_features_df = pd.read_csv("/kaggle/working/lab_features.csv")
lc_features_df = pd.read_csv("/kaggle/working/lc_features.csv")
quiz_features_df = pd.read_csv("/kaggle/working/quiz_features.csv")
project_features_df = pd.read_csv("/kaggle/working/project_features.csv")
project_df = pd.read_csv("/kaggle/input/project-sim/project_similarity_scores_alluids (3).csv")

# --- 2. Özellikleri birleştir (UID üzerinden) ---
features_df = lab_df.merge(lc_df, on="UID", how="outer")
features_df = features_df.merge(quiz_df, on="UID", how="outer")
features_df = features_df.merge(lab_features_df, on="UID", how="outer")
features_df = features_df.merge(lc_features_df, on="UID", how="outer")
features_df = features_df.merge(quiz_features_df, on="UID", how="outer")
features_df = features_df.merge(project_features_df, on="UID", how="outer")
features_df = features_df.merge(project_df, on="UID", how="outer")
features_df = features_df.drop_duplicates(subset="UID")

# --- 3. Train ve Test ile birleştir ---
train_merged = train_df.merge(features_df, on="UID", how="left")
test_merged = test_df.merge(features_df, on="UID", how="left")
train_merged.fillna(0, inplace=True)
test_merged.fillna(0, inplace=True)

# --- 4. Özellikler ve hedefleri ayır ---
X = train_merged.drop(columns=["UID", "FinalClass"])
y = train_merged["FinalClass"]
X_test = test_merged.drop(columns=["UID"], errors="ignore")

# --- 5. Eğitim / Validasyon böl ---
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=None)

# --- 6. Model Seçimi ---
# model = XGBRegressor(random_state=42)
# model = LinearRegression()
# model = Ridge()
# model = Lasso()
# model = DecisionTreeRegressor(random_state=42)
# model = GradientBoostingRegressor(random_state=42)
model = RandomForestRegressor(random_state=42)

# --- 7. Model Eğitimi ---
print(f"Model ({type(model).__name__}) eğitiliyor...")
model.fit(X_train, y_train)

# --- 8. Validasyon Tahmini ---
y_val_pred = model.predict(X_val)
val_results = pd.DataFrame({
    "True": y_val.values,
    "Predicted": y_val_pred,
    "Rounded": y_val_pred.round().clip(1, 20).astype(int)
})
val_results["error"] = abs(val_results["True"] - val_results["Rounded"])

mse_raw = mean_squared_error(y_val, y_val_pred)
mse_rounded = mean_squared_error(y_val, val_results["Rounded"])

print("\n--- İlk 10 Tahmin (Validation Set) ---")
print(val_results.head(10))
print(f"\nHam MSE (Validation): {mse_raw:.4f}")
print(f"Yuvarlanmış MSE (Validation): {mse_rounded:.4f}")

# --- 9. Test Tahminlerini Yap ve CSV'ye Kaydet ---
y_test_pred = model.predict(X_test)
test_merged["FinalClass_Predicted"] = y_test_pred.round().clip(1, 20).astype(int)

# Sadece UID ve tahmini skorları kaydedelim:
test_result = test_merged[["UID", "FinalClass_Predicted"]]
test_result.to_csv("/kaggle/working/test_predicted_scores.csv", index=False)
