In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score
import pandas as pd
import itertools

In [22]:
data = pd.read_csv('scaled_data.csv')
#data = data[:1000]
X = data.drop('popularity', axis=1)
y = data['popularity']

In [26]:
grid = {
    'alpha': [0.1, 1.0, 10.0, 100.0, 200.0, 500.0],
    'solver': ['sag', 'auto', 'lsqr', 'sparse_cg'],
    'tol': [1e-4, 1e-3],
    'max_iter': [1000, 1500, 2000, 5000, 7500, 10000]
}

In [30]:
def grid_search_ridge(X, y, grid):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    keys = grid.keys()
    values = grid.values()

    best_r2 = -float('inf')
    for combo in itertools.product(*values):
        params = dict(zip(keys, combo))

        model = Ridge(**params)
        model.fit(X_train_scaled, y_train)

        y_pred = model.predict(X_test_scaled)

        r2 = r2_score(y_test, y_pred)
        #print(f"Params: {params}, R2: {r2}")
        if r2 > best_r2:
            best_r2 = r2
            best_params = params

    return best_params, best_r2

In [31]:
print("\n\nThe best parameters are: ", grid_search_ridge(X, y, grid))



The best parameters are:  ({'alpha': 1.0, 'solver': 'sag', 'tol': 0.001, 'max_iter': 5000}, 0.5895833748103139)
