## Imports

In [47]:
import pandas as pd
import numpy as np
import hvplot.pandas
import yfinance as yf
import datetime


## Data Collection

In [48]:
# Define the stock symbol and create a Yahoo Finance ticker object
symbol = "CORN"
ticker = yf.Ticker(symbol)

# Get historical stock data for the last 5 years
historical_data = ticker.history(period="5y")

# Print the first few rows of the historical data
print(historical_data.head())

                            Open   High    Low  Close  Volume  Dividends  \
Date                                                                       
2018-09-17 00:00:00-04:00  15.68  15.68  15.58  15.59   55600        0.0   
2018-09-18 00:00:00-04:00  15.56  15.56  15.39  15.40   98700        0.0   
2018-09-19 00:00:00-04:00  15.41  15.54  15.41  15.50   49900        0.0   
2018-09-20 00:00:00-04:00  15.57  15.86  15.50  15.75   91900        0.0   
2018-09-21 00:00:00-04:00  15.79  15.94  15.77  15.93   69300        0.0   

                           Stock Splits  Capital Gains  
Date                                                    
2018-09-17 00:00:00-04:00           0.0            0.0  
2018-09-18 00:00:00-04:00           0.0            0.0  
2018-09-19 00:00:00-04:00           0.0            0.0  
2018-09-20 00:00:00-04:00           0.0            0.0  
2018-09-21 00:00:00-04:00           0.0            0.0  


## Daily Returns

In [49]:
# Calculate daily returns
historical_data['Daily_Return'] = historical_data['Close'].pct_change()

# Print the first few rows of the data with daily returns
print(historical_data.head())

                            Open   High    Low  Close  Volume  Dividends  \
Date                                                                       
2018-09-17 00:00:00-04:00  15.68  15.68  15.58  15.59   55600        0.0   
2018-09-18 00:00:00-04:00  15.56  15.56  15.39  15.40   98700        0.0   
2018-09-19 00:00:00-04:00  15.41  15.54  15.41  15.50   49900        0.0   
2018-09-20 00:00:00-04:00  15.57  15.86  15.50  15.75   91900        0.0   
2018-09-21 00:00:00-04:00  15.79  15.94  15.77  15.93   69300        0.0   

                           Stock Splits  Capital Gains  Daily_Return  
Date                                                                  
2018-09-17 00:00:00-04:00           0.0            0.0           NaN  
2018-09-18 00:00:00-04:00           0.0            0.0     -0.012187  
2018-09-19 00:00:00-04:00           0.0            0.0      0.006494  
2018-09-20 00:00:00-04:00           0.0            0.0      0.016129  
2018-09-21 00:00:00-04:00           0.0  

In [50]:
# Convert to DataFrame and Reset Index
df = pd.DataFrame(historical_data)
df = df.reset_index()
df = df.drop(columns = ["Open", "High", "Low", "Volume", "Dividends", "Stock Splits", "Capital Gains"])
df.head()

Unnamed: 0,Date,Close,Daily_Return
0,2018-09-17 00:00:00-04:00,15.59,
1,2018-09-18 00:00:00-04:00,15.4,-0.012187
2,2018-09-19 00:00:00-04:00,15.5,0.006494
3,2018-09-20 00:00:00-04:00,15.75,0.016129
4,2018-09-21 00:00:00-04:00,15.93,0.011429


## Cumulative Returns

In [51]:
# Calculate Cumulative Return and add as a Column
df['Cumulative_Return'] = (1 + df['Daily_Return']).cumprod() - 1

df.head()

Unnamed: 0,Date,Close,Daily_Return,Cumulative_Return
0,2018-09-17 00:00:00-04:00,15.59,,
1,2018-09-18 00:00:00-04:00,15.4,-0.012187,-0.012187
2,2018-09-19 00:00:00-04:00,15.5,0.006494,-0.005773
3,2018-09-20 00:00:00-04:00,15.75,0.016129,0.010263
4,2018-09-21 00:00:00-04:00,15.93,0.011429,0.021809


# Trading Algorithm

In [52]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

## Create Short and Long Window 

In [53]:
df = df.set_index(["Date"])

df['SMA_50'] = df['Close'].rolling(window=45).mean()
df['SMA_200'] = df['Close'].rolling(window=90).mean()
df.head()

Unnamed: 0_level_0,Close,Daily_Return,Cumulative_Return,SMA_50,SMA_200
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-09-17 00:00:00-04:00,15.59,,,,
2018-09-18 00:00:00-04:00,15.4,-0.012187,-0.012187,,
2018-09-19 00:00:00-04:00,15.5,0.006494,-0.005773,,
2018-09-20 00:00:00-04:00,15.75,0.016129,0.010263,,
2018-09-21 00:00:00-04:00,15.93,0.011429,0.021809,,


In [54]:
# Create a binary target variable: 1 for buy signal, 0 for sell signal

df['Signal'] = np.where(df['SMA_50'] > df['SMA_200'], 1, -1)
df.head()

Unnamed: 0_level_0,Close,Daily_Return,Cumulative_Return,SMA_50,SMA_200,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-09-17 00:00:00-04:00,15.59,,,,,-1
2018-09-18 00:00:00-04:00,15.4,-0.012187,-0.012187,,,-1
2018-09-19 00:00:00-04:00,15.5,0.006494,-0.005773,,,-1
2018-09-20 00:00:00-04:00,15.75,0.016129,0.010263,,,-1
2018-09-21 00:00:00-04:00,15.93,0.011429,0.021809,,,-1


In [55]:
# Drop NaN values and shift target variable by one day
df['Signal'] = df['Signal'].shift(-1)
df = df.dropna()
df.head()

Unnamed: 0_level_0,Close,Daily_Return,Cumulative_Return,SMA_50,SMA_200,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-01-25 00:00:00-05:00,16.309999,0.00431,0.046183,16.217778,16.206333,1.0
2019-01-28 00:00:00-05:00,16.299999,-0.000613,0.045542,16.224,16.214222,1.0
2019-01-29 00:00:00-05:00,16.26,-0.002454,0.042976,16.230222,16.223778,1.0
2019-01-30 00:00:00-05:00,16.35,0.005535,0.048749,16.237778,16.233222,1.0
2019-01-31 00:00:00-05:00,16.190001,-0.009786,0.038486,16.243333,16.238111,1.0


In [56]:
# Prepare data for machine learning
X = df.drop(columns = (["Close", "Daily_Return", "Cumulative_Return", "Signal"]))
y = df["Signal"]
display(X)
display(y)


Unnamed: 0_level_0,SMA_50,SMA_200
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-01-25 00:00:00-05:00,16.217778,16.206333
2019-01-28 00:00:00-05:00,16.224000,16.214222
2019-01-29 00:00:00-05:00,16.230222,16.223778
2019-01-30 00:00:00-05:00,16.237778,16.233222
2019-01-31 00:00:00-05:00,16.243333,16.238111
...,...,...
2023-09-07 00:00:00-04:00,22.822000,23.237111
2023-09-08 00:00:00-04:00,22.809333,23.225444
2023-09-11 00:00:00-04:00,22.808889,23.217333
2023-09-12 00:00:00-04:00,22.798444,23.200667


Date
2019-01-25 00:00:00-05:00    1.0
2019-01-28 00:00:00-05:00    1.0
2019-01-29 00:00:00-05:00    1.0
2019-01-30 00:00:00-05:00    1.0
2019-01-31 00:00:00-05:00    1.0
                            ... 
2023-09-07 00:00:00-04:00   -1.0
2023-09-08 00:00:00-04:00   -1.0
2023-09-11 00:00:00-04:00   -1.0
2023-09-12 00:00:00-04:00   -1.0
2023-09-13 00:00:00-04:00   -1.0
Name: Signal, Length: 1167, dtype: float64

In [57]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [58]:
# Scale the data and transform
scaler = StandardScaler()

X_scaler = scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [59]:
# Build and train a machine learning model
model = LogisticRegression(random_state=1)
model.fit(X_train_scaled, y_train)

In [60]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
testing_report = classification_report(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(testing_report)

Accuracy: 0.9444444444444444
              precision    recall  f1-score   support

        -1.0       1.00      0.88      0.94       113
         1.0       0.90      1.00      0.95       121

    accuracy                           0.94       234
   macro avg       0.95      0.94      0.94       234
weighted avg       0.95      0.94      0.94       234





In [61]:
# Create a new empty predictions DataFrame using code provided below.
predictions_df = pd.DataFrame(index=X_test.index)
predictions_df["Predicted Signal"] = y_pred
predictions_df["Actual Returns"] = df["Daily_Return"]
predictions_df["Trading Algorithm Returns"] = predictions_df["Actual Returns"] * predictions_df["Predicted Signal"]
predictions_df

Unnamed: 0_level_0,Predicted Signal,Actual Returns,Trading Algorithm Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-11-16 00:00:00-05:00,1.0,0.009312,0.009312
2020-04-17 00:00:00-04:00,-1.0,0.019721,-0.019721
2019-12-03 00:00:00-05:00,1.0,-0.001375,-0.001375
2022-11-28 00:00:00-05:00,1.0,-0.000375,-0.000375
2021-02-25 00:00:00-05:00,1.0,-0.011455,-0.011455
...,...,...,...
2021-03-10 00:00:00-05:00,1.0,-0.017898,-0.017898
2022-10-24 00:00:00-04:00,1.0,-0.002200,-0.002200
2019-06-03 00:00:00-04:00,-1.0,-0.004189,0.004189
2019-06-25 00:00:00-04:00,1.0,-0.001168,-0.001168


In [62]:
#Calcualte the cumulative returns for the actual returns and trading algorithm returns
cumulative_df = pd.DataFrame(index=X_test.index)
cumulative_df['Actual Cumulative Returns'] = (1 + predictions_df[['Actual Returns']]).cumprod()
cumulative_df['Algo Cumulative Returns'] = (1 + predictions_df[['Trading Algorithm Returns']]).cumprod()

#Review the cumulative returns dataframe
cumulative_df.head()

Unnamed: 0_level_0,Actual Cumulative Returns,Algo Cumulative Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-11-16 00:00:00-05:00,1.009312,1.009312
2020-04-17 00:00:00-04:00,1.029217,0.989408
2019-12-03 00:00:00-05:00,1.027802,0.988048
2022-11-28 00:00:00-05:00,1.027416,0.987677
2021-02-25 00:00:00-05:00,1.015647,0.976364


## Back Test the Actual Trading Strategy

In [64]:
#Create the dataframe for the Actual Cumualtive Returns
algo_backtest = pd.concat([df, cumulative_df], axis='columns', join='inner')

#Drop the Algo Cumulative Returns Column
algo_backtest.drop(columns='Algo Cumulative Returns', inplace=True)

# Calculate the points in time at which a position should be taken, 1 or -1
algo_backtest['Entry/Exit'] = algo_backtest['Signal'].diff()
pd.set_option('display.max_rows', None)

# Set the intial capital 
initial_capital = float(100000)

# Set the share size
share_size = 500

#Take the share position where the dual moving average crossover is 1
algo_backtest['Position'] = share_size * algo_backtest['Signal']

# Find the points in time where a 500 share position is bought or sold
algo_backtest["Entry/Exit Position"] = algo_backtest["Position"].diff()

# Multiply share price by entry/exit positions and get the cumulatively sum
algo_backtest["Portfolio Holdings"] = (
    algo_backtest["Close"] * algo_backtest["Entry/Exit Position"].cumsum()
)

# Subtract the initial capital by the portfolio holdings to get the amount of liquid cash in the portfolio
algo_backtest["Portfolio Cash"] = (
    initial_capital - (algo_backtest["Close"] * algo_backtest["Entry/Exit Position"]).cumsum()
)

# Get the total portfolio value by adding the cash amount by the portfolio holdings (or investments)
algo_backtest["Portfolio Total"] = (
   algo_backtest["Portfolio Cash"] + algo_backtest["Portfolio Holdings"]
)

# Calculate the portfolio daily returns
algo_backtest["Portfolio Daily Returns"] = algo_backtest["Portfolio Total"].pct_change()

# Calculate the cumulative returns
algo_backtest["Portfolio Cumulative Returns"] = (
    1 + algo_backtest["Portfolio Daily Returns"]
).cumprod() - 1

# Create the list of the metric names
metrics1 = [
    'Annualized Return',
    'Cumulative Returns',
    'Annual Volatility',
    'Sharpe Ratio',
    'Sortino Ratio'
]

# Create a list that holds the column name
columns1 = ['Backtest']

# Initialize the DataFrame with index set to evaluation metrics and columns 
algo_evaluation_df = pd.DataFrame(index=metrics1, columns=columns1)

algo_evaluation_df.loc['Annualized Return'] = (
    algo_backtest['Portfolio Daily Returns'].mean() * 252
)

# Calculate the Cumulative returns metric
algo_evaluation_df.loc['Cumulative Returns'] = algo_backtest['Portfolio Cumulative Returns'][-1]

# Calculate the Annual volatility metric
algo_evaluation_df.loc['Annual Volatility'] = (
    algo_backtest['Portfolio Daily Returns'].std() * np.sqrt(252)
)
# Calculate the Sharpe ratio
algo_evaluation_df.loc['Sharpe Ratio'] = (
    algo_backtest['Portfolio Daily Returns'].mean() * 252) / (
    algo_backtest['Portfolio Daily Returns'].std() * np.sqrt(252)
)

# Calculate the Sortino ratio
# Start by calculating the downside return values

# Create a DataFrame that contains the Portfolio Daily Returns column
sortino_ratio_df = algo_backtest[['Portfolio Daily Returns']].copy()

# Create a column to hold downside return values
sortino_ratio_df.loc[:,'Downside Returns'] = 0

# Find Portfolio Daily Returns values less than 0, 
# square those values, and add them to the Downside Returns column
sortino_ratio_df.loc[sortino_ratio_df['Portfolio Daily Returns'] < 0, 
                     'Downside Returns'] = sortino_ratio_df['Portfolio Daily Returns']**2

# Calculate the annualized return value
annualized_return = sortino_ratio_df['Portfolio Daily Returns'].mean() * 252

# Calculate the annualized downside standard deviation value
downside_standard_deviation = np.sqrt(sortino_ratio_df['Downside Returns'].mean()) * np.sqrt(252)

# Divide the annualized return value by the downside standard deviation value
sortino_ratio = annualized_return/downside_standard_deviation

# Add the Sortino ratio to the evaluation DataFrame
algo_evaluation_df.loc['Sortino Ratio'] = sortino_ratio

# Review the portfolio evaluation DataFrame
algo_evaluation_df

Unnamed: 0,Backtest
Annualized Return,0.010384
Cumulative Returns,0.0075
Annual Volatility,0.067445
Sharpe Ratio,0.15396
Sortino Ratio,0.217461
