In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import wrds
import matplotlib.pyplot as plt
import seaborn as sns

# Connect to the WRDS database using the WRDS API
db = wrds.Connection()

# Define the research time period
start_date = '2000-01-01'
end_date = '2022-12-31'

# Get the list of stocks
query = f"""
    SELECT DISTINCT ticker
    FROM your_stock_info_table
"""
ticker_list = db.raw_sql(query)['ticker'].tolist()

# Initialize an empty DataFrame to store the results
results_df = pd.DataFrame(columns=['Ticker', 'Regression Coefficients', 'R-squared', 'P-value'])

# Initialize an empty DataFrame to store the returns data for each stock
returns_data = pd.DataFrame()

# Initialize an empty DataFrame to store financial data for all stocks
financial_data_all = pd.DataFrame()

# Loop through each stock
for ticker in ticker_list:
    # Get financial data including multiple financial metrics and revenue data
    query = f"""
        SELECT date, actual_eps, expected_eps, eps_std_dev,
               actual_revenue, expected_revenue, revenue_std_dev,
               other_financial_metric_1, other_financial_metric_2
        FROM your_financial_table
        WHERE date BETWEEN '{start_date}' AND '{end_date}' AND ticker = '{ticker}'
    """
    financial_data = db.raw_sql(query)
    financial_data.set_index('date', inplace=True)

    # Get market data including stock price and stock returns
    query = f"""
        SELECT date, adj_close_price
        FROM your_stock_price_table
        WHERE date BETWEEN '{start_date}' AND '{end_date}' AND ticker = '{ticker}'
    """
    stock_data = db.raw_sql(query)
    stock_data.set_index('date', inplace=True)

    # Calculate SUE (Standardized Unexpected Earnings) and SURGE (Standardized Unexpected Revenue Growth)
    financial_data['SUE'] = (financial_data['actual_eps'] - financial_data['expected_eps']) / financial_data['eps_std_dev']
    financial_data['SURGE'] = (financial_data['actual_revenue'] - financial_data['expected_revenue']) / financial_data['revenue_std_dev']

    # Merge financial data and stock data
    merged_data = pd.concat([stock_data, financial_data], axis=1)

    # Build a multiple linear regression model including multiple financial metrics
    X = sm.add_constant(merged_data[['SUE', 'SURGE', 'other_financial_metric_1', 'other_financial_metric_2']])
    y = merged_data['adj_close_price']

    model = sm.OLS(y, X).fit()

    # Calculate stock returns and add them to the returns data DataFrame
    merged_data['Returns'] = merged_data['adj_close_price'].pct_change()
    returns_data = pd.concat([returns_data, merged_data[['Returns']]], axis=1)

    # Add each stock's financial data to the financial data DataFrame
    financial_data_all = pd.concat([financial_data_all, financial_data], axis=0)

    # Add the results to the results DataFrame
    results_df = results_df.append({'Ticker': ticker, 'Regression Coefficients': model.params, 'R-squared': model.rsquared, 'P-value': model.pvalues}, ignore_index=True)

# Print the results
print(results_df)

# Plot the distribution of stock returns
plt.figure(figsize=(12, 6))
sns.histplot(returns_data, bins=100, kde=True, alpha=0.5)
plt.title('Distribution of Stock Returns')
plt.xlabel('Returns')
plt.ylabel('Frequency')
plt.legend(ticker_list, loc='upper right')
plt.show()

# Plot the time series of different financial metrics for different stocks
financial_metrics_to_plot = ['SUE', 'SURGE', 'other_financial_metric_1', 'other_financial_metric_2']

for metric in financial_metrics_to_plot:
    plt.figure(figsize=(12, 6))
    for ticker in ticker_list:
        plt.plot(financial_data_all.loc[financial_data_all.index.year >= 2000, metric], label=ticker)

    plt.title(f'{metric} Time Series')
    plt.xlabel('Year')
    plt.ylabel(metric)
    plt.legend(loc='upper right')
    plt.show()

# Disconnect from the WRDS database
db.close()
