In [5]:

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

# Read the CSV data into a pandas DataFrame
df = pd.read_csv('data.csv')
df

Unnamed: 0,ticker,date,last,volume
0,1332 JT,2013-01-04,169.0987,1464100
1,1332 JT,2013-01-07,166.3266,1783500
2,1332 JT,2013-01-08,166.3266,1759800
3,1332 JT,2013-01-09,165.4026,767800
4,1332 JT,2013-01-10,167.2507,1503100
...,...,...,...,...
451108,9984 JT,2021-03-15,10370.0000,12041200
451109,9984 JT,2021-03-16,10620.0000,11346000
451110,9984 JT,2021-03-17,10400.0000,9771000
451111,9984 JT,2021-03-18,10220.0000,13941600


In [6]:
# Step 1: Calculate daily returns for each ticker
df['returns'] = df.groupby('ticker')['last'].pct_change()
df

Unnamed: 0,ticker,date,last,volume,returns
0,1332 JT,2013-01-04,169.0987,1464100,
1,1332 JT,2013-01-07,166.3266,1783500,-0.016393
2,1332 JT,2013-01-08,166.3266,1759800,0.000000
3,1332 JT,2013-01-09,165.4026,767800,-0.005555
4,1332 JT,2013-01-10,167.2507,1503100,0.011173
...,...,...,...,...,...
451108,9984 JT,2021-03-15,10370.0000,12041200,-0.024918
451109,9984 JT,2021-03-16,10620.0000,11346000,0.024108
451110,9984 JT,2021-03-17,10400.0000,9771000,-0.020716
451111,9984 JT,2021-03-18,10220.0000,13941600,-0.017308


In [7]:
# Step 2: ts_delay function
def ts_delay(series, d):
    return series.shift(d)

In [21]:
# Step 3: ts_regression function

def ts_regression(y, x, d):
    # Create empty list to store errors
    errors = []
    # For each row in the range of the dataframe
    for i in range(len(y)):
        # If there's insufficient data, append NaN
        if i < d - 1:
            errors.append(np.nan)
            continue
        
        # Slice the data
        y_slice = y.iloc[i-d+1:i+1].dropna().values.reshape(-1, 1)
        x_slice = x.iloc[i-d+1:i+1].dropna().values.reshape(-1, 1)
        
        # Check if there are enough data points after dropping NaN values
        if len(y_slice) < d or len(x_slice) < d:
            errors.append(np.nan)
            continue
        
        # Regression model
        reg = LinearRegression().fit(x_slice, y_slice)
        pred = reg.predict(x_slice)
        # Calculate the error term (residuals)
        error = y_slice[-1] - pred[-1]
        errors.append(error[0])
    
    return pd.Series(errors, index=y.index)




In [22]:
# Apply the functions and compute the alpha
df['delayed_returns'] = df.groupby('ticker')['returns'].transform(lambda x: ts_delay(x, 1))

reg_errors = df.groupby('ticker').apply(lambda x: ts_regression(x['returns'], x['delayed_returns'], 120))


In [24]:
reg_errors = reg_errors.reset_index(level=0, drop=True)
df['reg_error'] = reg_errors
df['alpha'] = df['reg_error'].rank(pct=True)

In [31]:
# Save the results
df.to_csv('result_file_path.csv', index=False)
df

Unnamed: 0,ticker,date,last,volume,returns,delayed_returns,reg_error,alpha,daily_return
0,1332 JT,2013-01-04,169.0987,1464100,,,,,
1,1332 JT,2013-01-07,166.3266,1783500,-0.016393,,,,
2,1332 JT,2013-01-08,166.3266,1759800,0.000000,-0.016393,,,
3,1332 JT,2013-01-09,165.4026,767800,-0.005555,0.000000,,,
4,1332 JT,2013-01-10,167.2507,1503100,0.011173,-0.005555,,,
...,...,...,...,...,...,...,...,...,...
451108,9984 JT,2021-03-15,10370.0000,12041200,-0.024918,0.033528,-0.026489,0.069883,-0.023440
451109,9984 JT,2021-03-16,10620.0000,11346000,0.024108,-0.024918,0.016154,0.841490,0.001685
451110,9984 JT,2021-03-17,10400.0000,9771000,-0.020716,0.024108,-0.022404,0.096288,-0.017432
451111,9984 JT,2021-03-18,10220.0000,13941600,-0.017308,-0.020716,-0.023920,0.085441,-0.001667


In [29]:
import pandas as pd
import numpy as np

def backtest_strategy(df, risk_free_rate=0.03):
    """
    Backtest a trading strategy.
    
    Parameters:
        - df: DataFrame with daily 'alpha' and 'close' values for the stock tickers.
        - risk_free_rate: Annual risk-free rate. Default is 0.03 or 3%.
    
    Returns:
        - sharpe_ratio, annual_return_rate, max_drawdown
    """
    # Calculate daily portfolio returns
    df['daily_return'] = df.groupby('ticker')['last'].pct_change() * df['alpha'].shift(1)
    daily_returns = df.groupby('date')['daily_return'].sum()
    
    # Sharpe Ratio
    excess_daily_returns = daily_returns - risk_free_rate/252
    sharpe_ratio = np.sqrt(252) * excess_daily_returns.mean() / excess_daily_returns.std()
    
    # Annual Return Rate (CAGR)
    total_return = (daily_returns + 1).cumprod().iloc[-1]
    years = len(daily_returns) / 252
    annual_return_rate = (total_return)**(1/years) - 1
    
    # Maximum Drawdown
    cumulative_returns = (daily_returns + 1).cumprod()
    running_max = cumulative_returns.cummax()
    drawdowns = (cumulative_returns / running_max) - 1
    max_drawdown = drawdowns.min()
    
    return sharpe_ratio, annual_return_rate, max_drawdown




In [30]:
# Example usage:
df = pd.read_csv('result_file_path.csv')
sharpe_ratio, annual_return, max_drawdown = backtest_strategy(df)
print(f"Sharpe Ratio: {sharpe_ratio}")
print(f"Annual Return Rate: {annual_return*100}%")
print(f"Max Drawdown: {max_drawdown*100}%")

Sharpe Ratio: 0.6878725507684998
Annual Return Rate: -98.64918794488891%
Max Drawdown: -394388.60237659403%


In [34]:
import pandas as pd

def moving_average_crossover_strategy(df, short_window=40, long_window=100):
    """
    Implements the Moving Average Crossover Strategy.
    
    Parameters:
    - df: DataFrame with columns ['ticker', 'date', 'close']
    - short_window: Length for short-term moving average. Default is 40.
    - long_window: Length for long-term moving average. Default is 100.
    
    Returns:
    DataFrame with additional columns for short-term and long-term MAs, signals, and positions.
    """
    # Compute short and long moving averages
    df['short_mavg'] = df.groupby('ticker')['last'].transform(lambda x: x.rolling(window=short_window).mean())
    df['long_mavg'] = df.groupby('ticker')['last'].transform(lambda x: x.rolling(window=long_window).mean())

    # Create signals
    df['signal'] = 0.0
    df.loc[df['short_mavg'] > df['long_mavg'], 'signal'] = 1.0
    df.loc[df['short_mavg'] <= df['long_mavg'], 'signal'] = -1.0

    # Generate trading orders
    df['positions'] = df.groupby('ticker')['signal'].diff()

    return df

def backtest(df):
    """
    Backtest the Moving Average Crossover Strategy.

    Parameters:
    - df: DataFrame after applying the moving_average_crossover_strategy function.
    
    Returns:
    Annual return for each ticker.
    """
    # Calculate daily returns
    df['daily_returns'] = df.groupby('ticker')['last'].pct_change() * df['positions'].shift(1)
    daily_returns = df.groupby(['ticker', 'date'])['daily_returns'].sum().reset_index()

    # Calculate annual return for each ticker
    annual_returns = daily_returns.groupby('ticker')['daily_returns'].apply(lambda x: (x + 1).prod() ** (252.0 / len(x)) - 1)
    
    return annual_returns



In [35]:
# Load your data
# df = pd.read_csv('your_data.csv')

# Implement the strategy
strategy_df = moving_average_crossover_strategy(df)

# Backtest the strategy
returns = backtest(strategy_df)
print(returns)

ticker
1332 JT   -0.013177
1333 JT    0.020751
1334 JT   -0.045109
1605 JT    0.003537
1721 JT   -0.004199
             ...   
9681 JT   -0.013831
9735 JT   -0.002081
9766 JT    0.016374
9983 JT   -0.012141
9984 JT   -0.023650
Name: daily_returns, Length: 248, dtype: float64


In [36]:
def mean_reversion_strategy(df, window=100, z_entry_threshold=1, z_exit_threshold=0):
    df['mean'] = df['last'].rolling(window=window).mean()
    df['std'] = df['last'].rolling(window=window).std()
    df['z_score'] = (df['last'] - df['mean']) / df['std']
    
    df['signal'] = 0
    df.loc[df['z_score'] > z_entry_threshold, 'signal'] = -1
    df.loc[df['z_score'] < -z_entry_threshold, 'signal'] = 1
    df.loc[abs(df['z_score']) <= z_exit_threshold, 'signal'] = 0
    
    df['position'] = df['signal'].shift(1)
    return df


In [37]:
def bollinger_bands_strategy(df, window=20, num_std=2):
    df['SMA'] = df['last'].rolling(window=window).mean()
    df['rolling_std'] = df['last'].rolling(window=window).std()
    df['upper_band'] = df['SMA'] + (df['rolling_std'] * num_std)
    df['lower_band'] = df['SMA'] - (df['rolling_std'] * num_std)
    
    df['signal'] = 0
    df.loc[df['last'] > df['upper_band'], 'signal'] = -1
    df.loc[df['last'] < df['lower_band'], 'signal'] = 1
    
    df['position'] = df['signal'].shift(1)
    return df


In [38]:
def backtest_strategy(df):
    df['daily_returns'] = df['last'].pct_change() * df['position']
    df['cumulative_returns'] = (1 + df['daily_returns']).cumprod() - 1
    
    annual_return = df['daily_returns'].mean() * 252
    return annual_return


In [39]:
# Applying Mean Reversion Strategy
mean_reversion_df = mean_reversion_strategy(df, window=100, z_entry_threshold=1, z_exit_threshold=0)
annual_return_mean_reversion = backtest_strategy(mean_reversion_df)
print(f"Annual Return from Mean Reversion Strategy: {annual_return_mean_reversion:.2%}")

Annual Return from Mean Reversion Strategy: 3.65%


In [40]:
# Applying Bollinger Bands Strategy
bollinger_bands_df = bollinger_bands_strategy(df, window=20, num_std=2)
annual_return_bollinger = backtest_strategy(bollinger_bands_df)
print(f"Annual Return from Bollinger Bands Strategy: {annual_return_bollinger:.2%}")

Annual Return from Bollinger Bands Strategy: -0.65%
