In [41]:
import pandas as pd
import hvplot.pandas
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
import requests
import json
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

In [42]:
# Using the read_csv function and Path module, create a DataFrame 
# by importing the file from the Resources folder
Copper_df = pd.read_csv(
    Path('Resources/Copper.csv'),
    index_col='DATE',
    infer_datetime_format=True, 
    parse_dates=True
)
Nickel_df = pd.read_csv(
    Path('Resources/Nickel.csv'),
    index_col='DATE',
    infer_datetime_format=True, 
    parse_dates=True
)
Zinc_df = pd.read_csv(
    Path('Resources/Zinc.csv'),
    index_col='DATE',
    infer_datetime_format=True, 
    parse_dates=True
)


# Review the first and last five rows of the DataFrame

Copper_df.info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 383 entries, 1990-01-01 to 2021-11-01
Data columns (total 1 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   PCOPPUSDM  383 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


In [43]:
Nickel_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 383 entries, 1990-01-01 to 2021-11-01
Data columns (total 1 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   PNICKUSDM  383 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


In [44]:
Zinc_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 383 entries, 1990-01-01 to 2021-11-01
Data columns (total 1 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   PZINCUSDM  383 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


In [45]:
Commodities_list = [Copper_df, Nickel_df, Zinc_df]
Commodities_df =pd.concat([Copper_df, Nickel_df, Zinc_df],axis=1).loc['2018-01-01':'2021-11-01']
display(Commodities_df)
Commodities_df.info()

Unnamed: 0_level_0,PCOPPUSDM,PNICKUSDM,PZINCUSDM
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01,7065.852273,12864.875,3441.522727
2018-02-01,7006.525,13595.875,3532.9
2018-03-01,6799.178571,13392.5,3269.178571
2018-04-01,6851.5125,13938.1,3188.05
2018-05-01,6825.27381,14366.4881,3059.869048
2018-06-01,6965.857143,15105.65476,3088.571429
2018-07-01,6250.75,13793.86364,2656.125
2018-08-01,6051.045455,13411.35227,2512.0
2018-09-01,6050.7625,12510.35,2434.675
2018-10-01,6219.586957,12314.91304,2673.673913


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 47 entries, 2018-01-01 to 2021-11-01
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   PCOPPUSDM  47 non-null     float64
 1   PNICKUSDM  47 non-null     float64
 2   PZINCUSDM  47 non-null     float64
dtypes: float64(3)
memory usage: 1.5 KB


In [46]:
Commodities_df.rename(columns={'PCOPPUSDM': 'Copper', 'PNICKUSDM': 'Nickel', 'PZINCUSDM': 'Zinc'}, inplace=True)
display(Commodities_df)

Unnamed: 0_level_0,Copper,Nickel,Zinc
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01,7065.852273,12864.875,3441.522727
2018-02-01,7006.525,13595.875,3532.9
2018-03-01,6799.178571,13392.5,3269.178571
2018-04-01,6851.5125,13938.1,3188.05
2018-05-01,6825.27381,14366.4881,3059.869048
2018-06-01,6965.857143,15105.65476,3088.571429
2018-07-01,6250.75,13793.86364,2656.125
2018-08-01,6051.045455,13411.35227,2512.0
2018-09-01,6050.7625,12510.35,2434.675
2018-10-01,6219.586957,12314.91304,2673.673913


In [47]:
# Compute the `monthly_returns_df` DataFrame by chaining
# the pct_change function with the dropna function.
Monthly_Returns_df = Commodities_df.pct_change().dropna()
Monthly_Returns_df

Unnamed: 0_level_0,Copper,Nickel,Zinc
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-02-01,-0.008396,0.056821,0.026551
2018-03-01,-0.029593,-0.014959,-0.074647
2018-04-01,0.007697,0.040739,-0.024816
2018-05-01,-0.00383,0.030735,-0.040207
2018-06-01,0.020597,0.051451,0.00938
2018-07-01,-0.102659,-0.086841,-0.140015
2018-08-01,-0.031949,-0.027731,-0.054261
2018-09-01,-4.7e-05,-0.067182,-0.030782
2018-10-01,0.027901,-0.015622,0.098165
2018-11-01,-0.003805,-0.087309,-0.029166


In [48]:
Monthly_Returns_df.describe()

Unnamed: 0,Copper,Nickel,Zinc
count,46.0,46.0,46.0
mean,0.008018,0.011507,0.0007
std,0.046302,0.063222,0.055789
min,-0.102659,-0.11718,-0.140015
25%,-0.019289,-0.026629,-0.030378
50%,0.000641,0.020386,0.008658
75%,0.034514,0.050253,0.034182
max,0.107369,0.162579,0.108726


In [49]:
# Load the environment variables by calling the load_dotenv function
load_dotenv()

True

In [50]:
# Set Alpaca API key and secret by calling the os.getenv function and referencing the environment variable names
# Set each environment variable to a notebook variable of the same name
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

# Check the values were imported correctly by evaluating the type of each
display(type(alpaca_api_key))
display(type(alpaca_secret_key))

str

str

In [51]:
# Create your Alpaca API REST object by calling Alpaca's tradeapi.REST function
# Set the parameters to your alpaca_api_key, alpaca_secret_key and api_version="v2" 
alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="v2")

In [52]:
# Create the list for the required tickers
tickers = ["COST", "XLP","VDC","PG"]

In [53]:
# Set the values for start_date and end_date using the pd.Timestamp function
# The start and end data should be 
# Set the parameter tz to "America/New_York", 
# Set this all to the ISO format by calling the isoformat function 
start_date = pd.Timestamp("2018-01-01", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2021-11-01", tz="America/New_York").isoformat()

In [54]:
# Set timeframe to one day (1D)
timeframe = "1D"

# Set number of rows to 1000 to retrieve the maximum amount of rows
limit_rows = 1000

In [55]:
# Use the Alpaca get_barset function to gather the price information for each ticker
# Include the function parameters: tickers, timeframe, start, end, and limit
# Be sure to call the df property to ensure that the returned information is set as a DataFrame
prices_df = alpaca.get_barset(
    tickers,
    timeframe,
    start=start_date,
    end=end_date,
    limit=limit_rows,
).df

# Review the first five rows of the resulting DataFrame 
prices_df.head()

Unnamed: 0_level_0,COST,COST,COST,COST,COST,PG,PG,PG,PG,PG,VDC,VDC,VDC,VDC,VDC,XLP,XLP,XLP,XLP,XLP
Unnamed: 0_level_1,open,high,low,close,volume,open,high,low,close,volume,open,high,low,close,volume,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
2018-01-02 00:00:00-05:00,187.19,190.68,186.75,188.36,1746783,91.92,91.93,90.55,90.66,5257189,146.16,146.32,145.2684,145.29,171020.0,56.95,57.02,56.53,56.57,11288883
2018-01-03 00:00:00-05:00,188.52,192.1,188.38,190.61,2258752,90.98,91.09,90.515,90.57,3976843,145.42,145.7,145.1111,145.2,100558.0,56.63,56.72,56.47,56.52,9795436
2018-01-04 00:00:00-05:00,192.0,192.3,187.561,189.12,2566125,90.83,91.77,90.61,91.18,4316526,145.42,146.2818,145.3002,145.6,316349.0,56.67,56.9453,56.53,56.69,7205552
2018-01-05 00:00:00-05:00,189.43,189.68,186.72,187.75,2000923,91.15,91.3,90.57,91.19,3848619,145.98,146.2242,145.49,146.18,102662.0,56.77,56.9667,56.635,56.94,7524295
2018-01-08 00:00:00-05:00,187.32,188.979,187.25,188.47,1126558,90.93,91.77,90.93,91.71,3298886,146.16,146.61,146.001,146.59,99092.0,56.85,57.12,56.82,57.07,6560047


In [56]:
stocks_monthly=prices_df.resample('MS').last()
stocks_monthly.head()


Unnamed: 0_level_0,COST,COST,COST,COST,COST,PG,PG,PG,PG,PG,VDC,VDC,VDC,VDC,VDC,XLP,XLP,XLP,XLP,XLP
Unnamed: 0_level_1,open,high,low,close,volume,open,high,low,close,volume,open,high,low,close,volume,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
2018-01-01 00:00:00-05:00,197.87,198.4747,194.1,194.88,1723240,86.81,87.41,86.075,86.38,7246937,149.38,149.6499,147.95,148.55,189536.0,58.14,58.29,57.585,57.83,12284388
2018-02-01 00:00:00-05:00,191.6,193.76,190.89,190.9,1228694,80.68,80.79,78.5,78.52,11284842,139.42,139.6,137.569,137.569,64497.0,54.29,54.32,53.41,53.41,10943106
2018-03-01 00:00:00-05:00,184.74,188.64,184.41,187.86,1438633,79.43,80.49,79.1799,79.28,7289155,135.72,136.8699,135.59,136.15,134563.0,52.48,52.9501,52.48,52.63,12805795
2018-04-01 00:00:00-04:00,197.1,199.04,196.3852,197.19,1117964,72.91,72.915,72.16,72.33,7806891,132.13,132.737,0.0,131.08,92090.0,50.86,51.0852,50.36,50.45,12479239
2018-05-01 00:00:00-04:00,200.21,200.6972,197.7,198.25,2802111,74.67,74.69,72.8,73.15,7705467,131.24,131.24,129.1,129.6,216627.0,50.38,50.43,49.47,49.67,17843563


In [57]:
stocks_close_df=stocks_monthly.xs("close",level=1,axis="columns")
stocks_close_df.head()

Unnamed: 0_level_0,COST,PG,VDC,XLP
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-01-01 00:00:00-05:00,194.88,86.38,148.55,57.83
2018-02-01 00:00:00-05:00,190.9,78.52,137.569,53.41
2018-03-01 00:00:00-05:00,187.86,79.28,136.15,52.63
2018-04-01 00:00:00-04:00,197.19,72.33,131.08,50.45
2018-05-01 00:00:00-04:00,198.25,73.15,129.6,49.67


In [58]:
stocks_close_df.index = stocks_close_df.index.date



In [59]:
assets_df=pd.concat([Commodities_df,stocks_close_df],axis=1)

In [60]:
assets_df.dropna()

Unnamed: 0,Copper,Nickel,Zinc,COST,PG,VDC,XLP
2018-01-01,7065.852273,12864.875,3441.522727,194.88,86.38,148.55,57.83
2018-02-01,7006.525,13595.875,3532.9,190.9,78.52,137.569,53.41
2018-03-01,6799.178571,13392.5,3269.178571,187.86,79.28,136.15,52.63
2018-04-01,6851.5125,13938.1,3188.05,197.19,72.33,131.08,50.45
2018-05-01,6825.27381,14366.4881,3059.869048,198.25,73.15,129.6,49.67
2018-06-01,6965.857143,15105.65476,3088.571429,209.03,78.07,134.25,51.51
2018-07-01,6250.75,13793.86364,2656.125,218.72,80.88,138.92,53.56
2018-08-01,6051.045455,13411.35227,2512.0,233.14,82.93,140.0896,53.81
2018-09-01,6050.7625,12510.35,2434.675,234.89,83.22,140.15,53.92
2018-10-01,6219.586957,12314.91304,2673.673913,228.63,88.66,142.48,55.04


In [61]:
assets_df.corr()

Unnamed: 0,Copper,Nickel,Zinc,COST,PG,VDC,XLP
Copper,1.0,0.807352,0.649963,0.695988,0.48373,0.779058,0.728068
Nickel,0.807352,1.0,0.400987,0.794215,0.649627,0.814884,0.789761
Zinc,0.649963,0.400987,1.0,0.061106,-0.181601,0.170074,0.110052
COST,0.695988,0.794215,0.061106,1.0,0.910465,0.939177,0.941463
PG,0.48373,0.649627,-0.181601,0.910465,1.0,0.897285,0.923665
VDC,0.779058,0.814884,0.170074,0.939177,0.897285,1.0,0.996209
XLP,0.728068,0.789761,0.110052,0.941463,0.923665,0.996209,1.0


In [62]:
assets_df.hvplot()

In [63]:
# Define features set X 
X = Commodities_df

# Display the features DataFrame
X.head()

Unnamed: 0_level_0,Copper,Nickel,Zinc
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01,7065.852273,12864.875,3441.522727
2018-02-01,7006.525,13595.875,3532.9
2018-03-01,6799.178571,13392.5,3269.178571
2018-04-01,6851.5125,13938.1,3188.05
2018-05-01,6825.27381,14366.4881,3059.869048


In [64]:
# Define target set by selecting the DEFAULT column
y = stocks_monthly

# Display a sample of y
y[:5]

Unnamed: 0_level_0,COST,COST,COST,COST,COST,PG,PG,PG,PG,PG,VDC,VDC,VDC,VDC,VDC,XLP,XLP,XLP,XLP,XLP
Unnamed: 0_level_1,open,high,low,close,volume,open,high,low,close,volume,open,high,low,close,volume,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
2018-01-01 00:00:00-05:00,197.87,198.4747,194.1,194.88,1723240,86.81,87.41,86.075,86.38,7246937,149.38,149.6499,147.95,148.55,189536.0,58.14,58.29,57.585,57.83,12284388
2018-02-01 00:00:00-05:00,191.6,193.76,190.89,190.9,1228694,80.68,80.79,78.5,78.52,11284842,139.42,139.6,137.569,137.569,64497.0,54.29,54.32,53.41,53.41,10943106
2018-03-01 00:00:00-05:00,184.74,188.64,184.41,187.86,1438633,79.43,80.49,79.1799,79.28,7289155,135.72,136.8699,135.59,136.15,134563.0,52.48,52.9501,52.48,52.63,12805795
2018-04-01 00:00:00-04:00,197.1,199.04,196.3852,197.19,1117964,72.91,72.915,72.16,72.33,7806891,132.13,132.737,0.0,131.08,92090.0,50.86,51.0852,50.36,50.45,12479239
2018-05-01 00:00:00-04:00,200.21,200.6972,197.7,198.25,2802111,74.67,74.69,72.8,73.15,7705467,131.24,131.24,129.1,129.6,216627.0,50.38,50.43,49.47,49.67,17843563


In [65]:
df=

SyntaxError: invalid syntax (3220236241.py, line 1)

In [None]:
# Create training and testing datasets using train_test_split
# Assign the function a random_state equal to 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
# Create the StandardScaler instance
X_scaler = StandardScaler()

In [None]:
# Fit the scaler to the features training dataset
X_scaler.fit(X_train)

StandardScaler()

In [None]:
# Scale both the training and testing data from the features dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Define the the number of inputs (features) to the model
number_input_features = len(X_train.iloc[0])

# Define the number of hidden nodes for the first hidden layer
# Use the mean of the number of input features plus the number of output nurons
# Use the Python floor division (//) to return the quotent
hidden_nodes_layer1 =  (number_input_features + 1) // 2 

# Define the number of hidden nodes for the second hidden layer
# Use the mean of the number of hidden nodes in the first hidden layer plus the number of output nurons
# Use the Python floor division (//) to return the quotent
hidden_nodes_layer2 = (hidden_nodes_layer1 + 1) // 2

# Create the Sequential model instance
nn = Sequential()

# Add the first hidden layer specifying the number of inputs, the number of hidden nodes, and the activation function
nn.add(Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))

# Add the second hidden layer specifying the number of hidden nodes and the activation function
nn.add(Dense(units=hidden_nodes_layer2, activation="relu"))

# Add the output layer to the model specifying the number of output neurons and activation function
nn.add(Dense(units=1, activation="sigmoid"))

In [66]:
 #Display the Sequential model summary
nn.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 2)                 8         
                                                                 
 dense_5 (Dense)             (None, 1)                 3         
                                                                 
 dense_6 (Dense)             (None, 1)                 2         
                                                                 
Total params: 13
Trainable params: 13
Non-trainable params: 0
_________________________________________________________________


In [67]:
# Compile the Sequential model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [68]:
# Fit the model using 100 epochs and the training data
model = nn.fit(X_train_scaled, y_train, epochs=100)

Epoch 1/100


ValueError: in user code:

    File "/Applications/anaconda3/envs/dev/lib/python3.9/site-packages/keras/engine/training.py", line 878, in train_function  *
        return step_function(self, iterator)
    File "/Applications/anaconda3/envs/dev/lib/python3.9/site-packages/keras/engine/training.py", line 867, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Applications/anaconda3/envs/dev/lib/python3.9/site-packages/keras/engine/training.py", line 860, in run_step  **
        outputs = model.train_step(data)
    File "/Applications/anaconda3/envs/dev/lib/python3.9/site-packages/keras/engine/training.py", line 809, in train_step
        loss = self.compiled_loss(
    File "/Applications/anaconda3/envs/dev/lib/python3.9/site-packages/keras/engine/compile_utils.py", line 201, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/Applications/anaconda3/envs/dev/lib/python3.9/site-packages/keras/losses.py", line 141, in __call__
        losses = call_fn(y_true, y_pred)
    File "/Applications/anaconda3/envs/dev/lib/python3.9/site-packages/keras/losses.py", line 245, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/Applications/anaconda3/envs/dev/lib/python3.9/site-packages/keras/losses.py", line 1807, in binary_crossentropy
        backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits),
    File "/Applications/anaconda3/envs/dev/lib/python3.9/site-packages/keras/backend.py", line 5158, in binary_crossentropy
        return tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)

    ValueError: `logits` and `labels` must have the same shape, received ((None, 1) vs (None, 20)).
