## Imports

In [138]:
import pandas as pd
import numpy as np
import hvplot.pandas
import yfinance as yf
import datetime


## Data Collection

In [139]:
# Define the stock symbol and create a Yahoo Finance ticker object
symbol = "CORN"
ticker = yf.Ticker(symbol)

# Get historical stock data for the last 5 years
historical_data = ticker.history(period="5y")

# Print the first few rows of the historical data
print(historical_data.head())

                                Open       High    Low  Close  Volume  \
Date                                                                    
2018-09-12 00:00:00-04:00  16.190001  16.200001  15.68  15.78  325800   
2018-09-13 00:00:00-04:00  15.760000  15.800000  15.63  15.70   55800   
2018-09-14 00:00:00-04:00  15.650000  15.730000  15.65  15.71   31600   
2018-09-17 00:00:00-04:00  15.680000  15.680000  15.58  15.59   55600   
2018-09-18 00:00:00-04:00  15.560000  15.560000  15.39  15.40   98700   

                           Dividends  Stock Splits  Capital Gains  
Date                                                               
2018-09-12 00:00:00-04:00        0.0           0.0            0.0  
2018-09-13 00:00:00-04:00        0.0           0.0            0.0  
2018-09-14 00:00:00-04:00        0.0           0.0            0.0  
2018-09-17 00:00:00-04:00        0.0           0.0            0.0  
2018-09-18 00:00:00-04:00        0.0           0.0            0.0  


## Daily Returns

In [140]:
# Calculate daily returns
historical_data['Daily_Return'] = historical_data['Close'].pct_change()

# Print the first few rows of the data with daily returns
print(historical_data.head())

                                Open       High    Low  Close  Volume  \
Date                                                                    
2018-09-12 00:00:00-04:00  16.190001  16.200001  15.68  15.78  325800   
2018-09-13 00:00:00-04:00  15.760000  15.800000  15.63  15.70   55800   
2018-09-14 00:00:00-04:00  15.650000  15.730000  15.65  15.71   31600   
2018-09-17 00:00:00-04:00  15.680000  15.680000  15.58  15.59   55600   
2018-09-18 00:00:00-04:00  15.560000  15.560000  15.39  15.40   98700   

                           Dividends  Stock Splits  Capital Gains  \
Date                                                                
2018-09-12 00:00:00-04:00        0.0           0.0            0.0   
2018-09-13 00:00:00-04:00        0.0           0.0            0.0   
2018-09-14 00:00:00-04:00        0.0           0.0            0.0   
2018-09-17 00:00:00-04:00        0.0           0.0            0.0   
2018-09-18 00:00:00-04:00        0.0           0.0            0.0   

    

In [141]:
# Convert to DataFrame and Reset Index
df = pd.DataFrame(historical_data)
df = df.reset_index()
df = df.drop(columns = ["Open", "High", "Low", "Volume", "Dividends", "Stock Splits", "Capital Gains"])
df.head()

Unnamed: 0,Date,Close,Daily_Return
0,2018-09-12 00:00:00-04:00,15.78,
1,2018-09-13 00:00:00-04:00,15.7,-0.00507
2,2018-09-14 00:00:00-04:00,15.71,0.000637
3,2018-09-17 00:00:00-04:00,15.59,-0.007638
4,2018-09-18 00:00:00-04:00,15.4,-0.012187


## Annualized Returns

In [142]:

# Calculate the total return over the period
total_return = (1 + df['Daily_Return']).prod() - 1

# Determine the number of years in the investment horizon
years = (df['Date'].max() - df['Date'].min()).days / 365

# Calculate annualized return using the formula: ((1 + Total Return)^(1 / Years) - 1)
annualized_return = (pow(1 + total_return, 1 / years) - 1) * 100 

print(years)
print(annualized_return)

5.0
7.190512162825113


## Cumulative Returns

In [143]:
# Calculate Cumulative Return and add as a Column
df['Cumulative_Return'] = (1 + df['Daily_Return']).cumprod() - 1

df.head()

Unnamed: 0,Date,Close,Daily_Return,Cumulative_Return
0,2018-09-12 00:00:00-04:00,15.78,,
1,2018-09-13 00:00:00-04:00,15.7,-0.00507,-0.00507
2,2018-09-14 00:00:00-04:00,15.71,0.000637,-0.004436
3,2018-09-17 00:00:00-04:00,15.59,-0.007638,-0.012041
4,2018-09-18 00:00:00-04:00,15.4,-0.012187,-0.024081


## Annual Volatility 

In [144]:
# Calculate the standard deviation of daily returns
daily_volatility = df['Daily_Return'].std()

# Annualize the volatility
annual_volatility = daily_volatility * np.sqrt(252)

print(daily_volatility)
print(annual_volatility)

0.014002668772951124
0.22228547558663184


## Sharpe Ratio

In [145]:
# Calculate the average daily return
average_daily_return = df['Daily_Return'].mean()

# Assume a daily risk free rate
daily_risk_free_rate = 0.001

# Calculate the Sharpe ratio
sharpe_ratio = (average_daily_return - daily_risk_free_rate) / daily_volatility

print(average_daily_return)
print(sharpe_ratio)

0.0003743550672460483
-0.044680406492404255


## Sortino Ratio

In [146]:
# Calculate downside risk (standard deviation of negative returns)
negative_returns = df[df['Daily_Return'] < 0]['Daily_Return']
downside_risk = negative_returns.std()

# Calculate the Sortino ratio
sortino_ratio = (average_daily_return - daily_risk_free_rate) / downside_risk

print(sortino_ratio)

-0.06559586892568836


# Trading Algorithm

In [147]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

## Create Short and Long Window 

In [148]:
df = df.set_index(["Date"])

df['SMA_50'] = df['Close'].rolling(window=50).mean()
df['SMA_200'] = df['Close'].rolling(window=200).mean()
df.head()

Unnamed: 0_level_0,Close,Daily_Return,Cumulative_Return,SMA_50,SMA_200
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-09-12 00:00:00-04:00,15.78,,,,
2018-09-13 00:00:00-04:00,15.7,-0.00507,-0.00507,,
2018-09-14 00:00:00-04:00,15.71,0.000637,-0.004436,,
2018-09-17 00:00:00-04:00,15.59,-0.007638,-0.012041,,
2018-09-18 00:00:00-04:00,15.4,-0.012187,-0.024081,,


In [149]:
# Create a binary target variable: 1 for buy signal, 0 for sell signal

df['Signal'] = np.where(df['SMA_50'] > df['SMA_200'], 1, -1)
df.head()

Unnamed: 0_level_0,Close,Daily_Return,Cumulative_Return,SMA_50,SMA_200,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-09-12 00:00:00-04:00,15.78,,,,,-1
2018-09-13 00:00:00-04:00,15.7,-0.00507,-0.00507,,,-1
2018-09-14 00:00:00-04:00,15.71,0.000637,-0.004436,,,-1
2018-09-17 00:00:00-04:00,15.59,-0.007638,-0.012041,,,-1
2018-09-18 00:00:00-04:00,15.4,-0.012187,-0.024081,,,-1


In [150]:
# Drop NaN values and shift target variable by one day
df['Signal'] = df['Signal'].shift(-1)
df = df.dropna()
df.head()

Unnamed: 0_level_0,Close,Daily_Return,Cumulative_Return,SMA_50,SMA_200,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-06-28 00:00:00-04:00,16.440001,-0.027219,0.041825,16.0092,16.0373,-1.0
2019-07-01 00:00:00-04:00,16.1,-0.020681,0.020279,16.0264,16.0389,1.0
2019-07-02 00:00:00-04:00,16.32,0.013665,0.034221,16.0514,16.042,1.0
2019-07-03 00:00:00-04:00,16.76,0.026961,0.062104,16.087,16.04725,1.0
2019-07-05 00:00:00-04:00,16.77,0.000597,0.062738,16.127,16.05315,1.0


In [151]:
# Prepare data for machine learning
X = df.drop(columns = (["Close", "Daily_Return", "Cumulative_Return", "Signal"]))
y = df["Signal"]
display(X)
display(y)


Unnamed: 0_level_0,SMA_50,SMA_200
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-06-28 00:00:00-04:00,16.0092,16.03730
2019-07-01 00:00:00-04:00,16.0264,16.03890
2019-07-02 00:00:00-04:00,16.0514,16.04200
2019-07-03 00:00:00-04:00,16.0870,16.04725
2019-07-05 00:00:00-04:00,16.1270,16.05315
...,...,...
2023-09-01 00:00:00-04:00,22.9912,24.67485
2023-09-05 00:00:00-04:00,22.9288,24.65355
2023-09-06 00:00:00-04:00,22.8624,24.63220
2023-09-07 00:00:00-04:00,22.8178,24.61090


Date
2019-06-28 00:00:00-04:00   -1.0
2019-07-01 00:00:00-04:00    1.0
2019-07-02 00:00:00-04:00    1.0
2019-07-03 00:00:00-04:00    1.0
2019-07-05 00:00:00-04:00    1.0
                            ... 
2023-09-01 00:00:00-04:00   -1.0
2023-09-05 00:00:00-04:00   -1.0
2023-09-06 00:00:00-04:00   -1.0
2023-09-07 00:00:00-04:00   -1.0
2023-09-08 00:00:00-04:00   -1.0
Name: Signal, Length: 1057, dtype: float64

In [152]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [153]:
# Scale the data and transform
scaler = StandardScaler()

X_scaler = scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [154]:
# Build and train a machine learning model
model = RandomForestClassifier(n_estimators=100, random_state=1)
model.fit(X_train_scaled, y_train)

In [155]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
testing_report = classification_report(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(testing_report)

Accuracy: 0.5330188679245284
              precision    recall  f1-score   support

        -1.0       0.00      0.00      0.00        99
         1.0       0.53      1.00      0.70       113

    accuracy                           0.53       212
   macro avg       0.27      0.50      0.35       212
weighted avg       0.28      0.53      0.37       212



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
