In [None]:
import pandas as pd
import statsmodels.api as sm
from sklearn.linear_model import Ridge
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import RidgeCV
from sklearn.model_selection import LeaveOneOut, KFold
from sklearn.preprocessing import StandardScaler


import pandas as pd
file_path = '/data.xlsx'
df = pd.read_excel(file_path)
def lag_and_square(ser, n):
    # Create a DataFrame with the original series as the first column
    df = pd.DataFrame({'Return': ser})

    # Generate lagged and squared lagged columns
    for i in range(1, n+1):
        lag_col = f'Lag_{i}'
        squared_lag_col = f'Squared_Lag_{i}'

        # Create lagged column
        df[lag_col] = ser.shift(i)

        # Create squared lagged column
        df[squared_lag_col] = df[lag_col] ** 2

    # Drop rows with NaN values
    return df.dropna()

# Example usage:
lagged_df = lag_and_square(df['RET'], 2)
print(lagged_df.head(5))



import pandas as pd
import statsmodels.api as sm

# Function for OLS regression
def OLS_regression(df, n):
    # Use your existing function to create a DataFrame with lagged and squared lagged values
    lagged_df = lag_and_square(df.iloc[:,0], n)  # assuming df has only one column

    # Prepare X and y for OLS regression
    X = lagged_df.iloc[:, 1:]  # Exclude the 'Return' column which is the dependent variable
    X = sm.add_constant(X)     # Add a constant term for the intercept
    y = lagged_df['Return']

    # Perform OLS regression
    model = sm.OLS(y, X).fit()
    return model

# Example usage:
n = 2
result_model = OLS_regression(lagged_df, n)
print(result_model.summary())


import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge

def plot_ridge_coeffs(df, s, n):
    # Assuming the first column is the dependent variable
    y = df.iloc[:, 0]
    # All remaining columns are used as independent variables in the regression
    X = df.iloc[:, 1:]

    # Add a constant term to the independent variables
    X = sm.add_constant(X)

    # Generate n values for the regularization parameter λ, evenly spaced between 0 and s
    alphas = np.linspace(0, s, n)

    # Initialize a list to store the coefficients for each λ
    coefs = []

    for alpha in alphas:
        # Create and fit the Ridge regression model for each value of alpha
        ridge = Ridge(alpha=alpha)
        ridge.fit(X, y)
        # Append the coefficients from the regression to the coefs list
        coefs.append(ridge.coef_)

    # Plot the Ridge coefficients against λ
    plt.figure(figsize=(10, 6))
    for i in range(1, X.shape[1]):  # Start at 1 to skip the constant term
        plt.plot(alphas, [coef[i] for coef in coefs], label=f'Coef for {X.columns[i]}')

    plt.xlabel('Regularization Parameter λ')
    plt.ylabel('Coefficient Value')
    plt.title('Ridge Regression Coefficients vs. Regularization Parameter')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.show()

# Example usage:
n_lags = 2  #Use this to adjust the number of lagged variables in the plot
df_lagged = lag_and_square(df['RET'], n_lags)  # This will create additional lagged variables based on n_lags


# Using the function with the df_lagged DataFrame
# Set the value of s and n
s_value = 1  # This should be a positive real scalar
n_value = 100  # This should be a positive integer

# Call the function with the lagged DataFrame
plot_ridge_coeffs(df_lagged, s_value, n_value)



import numpy as np
from sklearn.linear_model import RidgeCV
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm

def ridge_regression_cv(df, s, n, LOOCV):
    # Extract the dependent variable (first column) and independent variables
    y = df.iloc[:, 0]
    X = df.iloc[:, 1:]

    # Standardize X
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Add constant to the scaled data for the regression intercept
    X_scaled_with_constant = sm.add_constant(X_scaled)

    # Define the range of lambda values to test
    alphas = np.linspace(1e-8, s, n)

    # Perform Ridge Regression with Cross-Validation
    if LOOCV:
        ridge_cv = RidgeCV(alphas=alphas, cv=None)  # LOOCV
    else:
        ridge_cv = RidgeCV(alphas=alphas, cv=10)  # 10-fold CV

    ridge_cv.fit(X_scaled_with_constant, y)

    # Extract the optimal shrinkage parameter and coefficients
    best_lambda = ridge_cv.alpha_
    best_coefs = ridge_cv.coef_
    return best_lambda, best_coefs

# Example usage
n_lags = 2 #Use this to adjust the number of lagged variables
df_lagged1 = lag_and_square(df['RET'], n_lags)  # This will create additional lagged variables based on n_lags

best_lambda, best_coefs = ridge_regression_cv(df_lagged1, 10, 100, LOOCV=True)
print("Optimal shrinkage parameter (lambda):", best_lambda)
print("Coefficients at optimal shrinkage parameter:", best_coefs)


import numpy as np
from sklearn.linear_model import RidgeCV
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm

def ridge_regression_cv(df, s, n, LOOCV):
    # Extract the dependent variable (first column) and independent variables
    y = df.iloc[:, 0]
    X = df.iloc[:, 1:]

    # Standardize X
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled_with_constant = sm.add_constant(X_scaled)  # Add constant after scaling

    # Define the range of lambda values to test on a logarithmic scale
    alphas = np.logspace(-8, np.log10(s), n)

    # Perform Ridge Regression with Cross-Validation
    if LOOCV:
        ridge_cv = RidgeCV(alphas=alphas, cv=len(y))  # LOOCV
    else:
        ridge_cv = RidgeCV(alphas=alphas, cv=10)  # 10-fold CV

    ridge_cv.fit(X_scaled_with_constant, y)

    # Extract the optimal shrinkage parameter and coefficients
    best_lambda = ridge_cv.alpha_
    best_coefs = ridge_cv.coef_

    return best_lambda, best_coefs

# Example usage
n_lags = 2 #Use this to adjust the number of lagged variables
df_lagged1 = lag_and_square(df['RET'], n_lags)  # This will create additional lagged variables based on n_lags

best_lambda, best_coefs = ridge_regression_cv(df_lagged1, 1, 2, LOOCV=False)
print("Optimal shrinkage parameter (lambda):", best_lambda)
print("Coefficients at optimal shrinkage parameter:", best_coefs)

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler

def plot_ridge_coeffs(df, s, n):
    # Assuming the first column is the dependent variable
    y = df.iloc[:, 0]
    # All other columns are independent variables
    X = df.iloc[:, 1:]

    # Standardize X and add constant
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled = sm.add_constant(X_scaled)

    # Generate n values for the regularization parameter λ
    alphas = np.linspace(0, s, n)

    # Initialize a list to store the coefficients
    coefs = []

    for alpha in alphas:
        # Fit Ridge regression with current alpha
        ridge = Ridge(alpha=alpha)
        ridge.fit(X_scaled, y)
        # Store coefficients
        coefs.append(ridge.coef_)

    # Plotting the coefficients
    plt.figure(figsize=(10, 6))
    for i in range(X_scaled.shape[1] - 1):  # Exclude the constant
        plt.plot(alphas, [coef[i+1] for coef in coefs], label=f'Coef {i+1}')  # i+1 to skip constant

    plt.xlabel('Regularization Parameter (λ)')
    plt.ylabel('Coefficient Value')
    plt.title('Ridge Regression Coefficients vs. Regularization Parameter')
    plt.legend(loc='best')
    plt.show()
# Example usage with the lagged_df DataFrame
plot_ridge_coeffs(lagged_df, s=1, n=10)


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge

def plot_ridge_coefficients(df, s, n):
    # Define the dependent variable (y) and independent variables (X)
    y = df.iloc[:, 0]
    X = df.iloc[:, 1:]
    X = sm.add_constant(X)  # Add a constant to the model

    # Create n values of the regularization parameter λ evenly spaced between 0 and s
    alphas = np.linspace(0, s, n)

    # Initialize a list to store the coefficients for each Ridge regression
    coefficients = []

    # Run Ridge regression for each value of λ and store the coefficients
    for alpha in alphas:
        ridge = Ridge(alpha=alpha)
        ridge.fit(X, y)
        coefficients.append(ridge.coef_)

    # Convert the list of coefficients to a DataFrame for easier plotting
    coefficients_df = pd.DataFrame(coefficients, index=alphas, columns=X.columns)

    # Plot the coefficients
    plt.figure(figsize=(10, 6))
    for column in coefficients_df.columns:
        plt.plot(coefficients_df.index, coefficients_df[column], label=column)
    plt.xlabel('Regularization Parameter λ')
    plt.ylabel('Coefficients')
    plt.title('Ridge Regression Coefficients vs. Shrinkage')
    plt.legend()
    plt.show()
    print(X,y)

# Example usage with the lagged DataFrame
plot_ridge_coefficients(lagged_df, s=1, n=10)



def ridge_plot(df, s, n):
    df_lagged = lag_and_square(df, n)
    X = df_lagged.drop(columns=['Return'])
    X = sm.add_constant(X)
    y = df_lagged['Return']
    # Standardize the independent variables
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    alphas = np.linspace(0, s, n)
    coefs = []
    for alpha in alphas:
        ridge = Ridge(alpha=alpha)
        ridge.fit(X, y)
        coefs.append(ridge.coef_)
    plt.figure(figsize=(10, 6))
    for i, col_name in enumerate(df_lagged.drop(columns=['Return']).columns):
        plt.plot(alphas, [coef[i] for coef in coefs], label=col_name)
    plt.xlabel('Shrinkage Parameter (λ)')
    plt.ylabel('Coefficients')
    plt.title('Ridge Regression Coefficients vs. Shrinkage Parameter')
    plt.legend()
    plt.show()
ridge_plot(df['RET'], 1, 10)

def ridge_plot(df, s, n):
    df_lagged = lag_and_square(df, n)
    X = df_lagged.drop(columns=['Return'])
    X = sm.add_constant(X)
    y = df_lagged['Return']
    # Standardize the independent variables
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    alphas = np.linspace(0, s, n)
    coefs = []
    for alpha in alphas:
        ridge = Ridge(alpha=alpha)
        ridge.fit(X_scaled, y)
        coefs.append(ridge.coef_)
    plt.figure(figsize=(10, 6))
    # Exclude the coefficient for the constant term when plotting
    for i, col_name in enumerate(df_lagged.drop(columns=['Return']).columns):
        plt.plot(alphas, [coef[i + 1] for coef in coefs], label=col_name)
    plt.xlabel('Shrinkage Parameter (λ)')
    plt.ylabel('Coefficients')
    plt.title('Ridge Regression Coefficients vs. Shrinkage Parameter')
    plt.legend()
    plt.show()
    print(X,y)

ridge_plot(df['RET'], 1, 5)
