In [29]:
# Generate example data
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from scipy.stats import pearsonr

In [18]:
np.random.seed(0)
X = np.random.rand(100)  # 1D array with 100 random values for X
y = 3 * X + np.random.normal(0, 0.1, 100)  # y = 3*X + some noise

# Reshape X for linear regression (scikit-learn expects 2D input for features)
X = X.reshape(-1, 1)

# Fit linear regression model
model = LinearRegression()
model.fit(X, y)

# Predict y values
y_pred = model.predict(X)

# Calculate R^2 score
r2 = r2_score(y, y_pred)

print(r2)

0.9868523821370109


In [26]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Generate example data with 5 variables
np.random.seed(0)
X = np.random.rand(100, 5)  # 100 samples, 5 features
true_coefficients = np.array([2, -1, 3, 0.5, -2])  # Coefficients for each feature

# Generate y with a linear relationship to X plus some noise
y = X.dot(true_coefficients) + np.random.normal(0, 0.5, 100)

# Fit linear regression model
model = LinearRegression()
model.fit(X, y)

# Predict y values
y_pred = model.predict(X)

# Calculate R^2 score
r2 = r2_score(y, y_pred)

# Calculate R^2 score
r, _ = pearsonr(y, y_pred)
print(r, r2, r**2)

0.9355189329041746 0.8751956738221653 0.8751956738221657


In [44]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

# Generate example data with 5 variables
np.random.seed(0)
feats = 300
X = np.random.rand(100, feats)  # 100 samples, 5 features
true_coefficients = np.random.randint(-20, 20, feats)

# Generate y with a linear relationship to X plus some noise
y = X.dot(true_coefficients) + np.random.normal(0, 40, 100)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Fit linear regression model on the training set
model = LinearRegression()
model.fit(X_train, y_train)

# Predict y values for the test set
y_test_pred = model.predict(X_test)

# Calculate R^2 score on the test set
r2_test = r2_score(y_test, y_test_pred)

r_test, _ = pearsonr(y_test, y_test_pred)

print(r2_test, r_test, r_test**2)


-0.10958864279182445 0.07851768970036267 0.006165027595882439


In [56]:
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from scipy.stats import pearsonr

# Generate example data with 300 features
np.random.seed(0)
feats = 300
X = np.random.rand(100, feats)  # 100 samples, 300 features
true_coefficients = np.random.randint(-20, 20, feats)

# Generate y with a linear relationship to X plus some noise
y = X.dot(true_coefficients) + np.random.normal(0, 40, 100)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Define a range of alpha values to test
alphas = np.linspace(1e-5,1e10,100)

# Track the best alpha and best R^2 score on the test set
best_alpha = None
best_r2_test = -np.inf
best_r_test = None

for alpha in alphas:
    # Fit Ridge regression model with current alpha
    model = Ridge(alpha=alpha)
    model.fit(X_train, y_train)
    
    # Predict y values for the test set
    y_test_pred = model.predict(X_test)
    
    # Calculate R^2 score and Pearson correlation on the test set
    r2_test = r2_score(y_test, y_test_pred)
    r_test, _ = pearsonr(y_test, y_test_pred)
    
    # Check if this is the best R^2 score we've found so far
    if r2_test > best_r2_test:
        best_r2_test = r2_test
        best_alpha = alpha
        best_r_test = r_test

# Print the best results
print("Best alpha:", best_alpha)
print("Best R^2 score on the test set:", best_r2_test)
print("Best Pearson correlation coefficient:", best_r_test)
print("Best Pearson r^2:", best_r_test**2)


Best alpha: 10000000000.0
Best R^2 score on the test set: -0.0309398388213582
Best Pearson correlation coefficient: -0.13554758988685145
Best Pearson r^2: 0.018373149124134073
