# Stock Trading Bot Ayyyyy
- api reference: https://www.alphavantage.co/documentation/ and potentially https://site.financialmodelingprep.com/developer/docs

In [23]:
!pip install graphviz

Collecting graphviz
  Downloading graphviz-0.20.1-py3-none-any.whl (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.0/47.0 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: graphviz
Successfully installed graphviz-0.20.1


In [1]:
# This is just importing certain libraries we will use consistently throughouit the porject
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import random
import requests

#### In order to access the data from the API, we need the following:
- api key: It's generated by the platform as a hash code that is unique to each user so it knows that they are authenticated to use the service
- Base URL: For all calls to the api, we use the same base url(`https://www.alphavantage.co/query?`). However, in order to get more specified data, such as weekly data or data from a certain stock ticker, we need to add that to the url as parameters. Since all will use the `base_url`, we instantiate a global variable that be used wherever we need in any function and so we don't have to keep re-pasting it.
- tickers: this is just an array of the s&p 500 tickers. We will obviously need an array of them because we want to examine multiple stocks, not just one

In [2]:
api_key = "FI0W1W7TPDCRLO18"
base_url = "https://www.alphavantage.co/query?"
tickers = ['IBM', 'AAPL'] # This is just a test for now. Will add many more later, possibly thru another api

In [3]:
def weekly_req(ticker_arr):
    """This function takes in 1 parameter, the ticker array, and makes a request to the api with every single stock ticker symbol
    that is inside of the tickers array we creatred in the above cell. It then appends the data into a newly instantiated dfs array
    and concatenates it into a pandas dataframe for manipulation, modeling, and readability purposes."""
    function = "TIME_SERIES_DAILY_ADJUSTED"
    dfs = []
    for ticker in tickers:
        url = f"{base_url}function={function}&symbol={ticker}&apikey={api_key}"
        response = requests.get(url)
        data = response.json()
        # Extract the desired data from the json response and convert it to a DataFrame
        time_series_data = data["Time Series (Daily)"]
        df = pd.DataFrame.from_dict(time_series_data, orient='index')
        df['symbol'] = ticker  # Add a 'symbol' column with the ticker symbol

        # Append the DataFrame to the list
        dfs.append(df)
    result_df = pd.concat(dfs)

    return result_df

df=weekly_req(tickers)
df.reset_index(inplace=True)

In [15]:
df.rename(columns={'index': 'date'}, inplace=True)
df['1. open'] = df['1. open'].astype(float)
df['2. high'] = df['2. high'].astype(float)
df['3. low'] = df['3. low'].astype(float)
df['4. close'] = df['4. close'].astype(float)
df['5. adjusted close'] = df['5. adjusted close'].astype(float)
df['6. volume'] = df['6. volume'].astype(int)
df['7. dividend amount'] = df['7. dividend amount'].astype(float)
df['8. split coefficient'] = df['8. split coefficient'].astype(float)
df.head()

Unnamed: 0,date,1. open,2. high,3. low,4. close,5. adjusted close,6. volume,7. dividend amount,8. split coefficient,symbol
0,2023-06-20,136.36,137.23,135.89,135.96,135.96,4226993,0.0,1.0,IBM
1,2023-06-16,139.23,139.469,137.47,137.48,137.48,7473676,0.0,1.0,IBM
2,2023-06-15,137.27,138.8,137.175,138.4,138.4,3812582,0.0,1.0,IBM
3,2023-06-14,137.8,138.93,136.94,137.2,137.2,4514888,0.0,1.0,IBM
4,2023-06-13,136.51,138.17,136.0,137.6,137.6,3927331,0.0,1.0,IBM


In [16]:
df.dtypes

date                    datetime64[ns]
1. open                        float64
2. high                        float64
3. low                         float64
4. close                       float64
5. adjusted close              float64
6. volume                        int64
7. dividend amount             float64
8. split coefficient           float64
symbol                          object
dtype: object

In [17]:
# Convert date column to date value instead of str in order to compare and filter for only 1 week prior
df['date'] = pd.to_datetime(df['date'])

# Calculate the start and end dates for the previous week
end_date = df['date'].max()
start_date = end_date - pd.DateOffset(weeks=1)

# Filter the DataFrame for the previous week
filtered_df = df[(df['date'] >= start_date) & (df['date'] <= end_date)]
filtered_df

Unnamed: 0,date,1. open,2. high,3. low,4. close,5. adjusted close,6. volume,7. dividend amount,8. split coefficient,symbol
0,2023-06-20,136.36,137.23,135.89,135.96,135.96,4226993,0.0,1.0,IBM
1,2023-06-16,139.23,139.469,137.47,137.48,137.48,7473676,0.0,1.0,IBM
2,2023-06-15,137.27,138.8,137.175,138.4,138.4,3812582,0.0,1.0,IBM
3,2023-06-14,137.8,138.93,136.94,137.2,137.2,4514888,0.0,1.0,IBM
4,2023-06-13,136.51,138.17,136.0,137.6,137.6,3927331,0.0,1.0,IBM
100,2023-06-20,184.41,186.1,184.41,185.01,185.01,48964426,0.0,1.0,AAPL
101,2023-06-16,186.73,186.99,184.27,184.92,184.92,101256225,0.0,1.0,AAPL
102,2023-06-15,183.96,186.52,183.78,186.01,186.01,65433166,0.0,1.0,AAPL
103,2023-06-14,183.37,184.39,182.02,183.95,183.95,57462882,0.0,1.0,AAPL
104,2023-06-13,182.8,184.15,182.44,183.31,183.31,54929129,0.0,1.0,AAPL


In [27]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
import graphviz

unique_values = filtered_df['symbol'].unique()
for value in unique_values:
    # Filter the DataFrame for the current unique value
    subset = filtered_df[filtered_df['symbol'] == value]

    # Prepare the feature matrix X and target variable y
    X = subset[['1. open', '2. high', '3. low', '6. volume']]  # Adjust column names as per your data
    y = subset['5. adjusted close']  # Adjust column names as per your data

    # Create a decision tree classifier
    clf = DecisionTreeClassifier()
    clf.fit(X, y)

    # Visualize the decision tree
    dot_data = tree.export_graphviz(clf, out_file=None,
                                   feature_names=X.columns,
                                   class_names=y.unique().astype(str),
                                   filled=True, rounded=True,
                                   special_characters=True)
    graph = graphviz.Source(dot_data)
    graph.render(f"decision_tree_{value}")  # Save the visualization to a file (optional)
    graph.view()  # Display the decision tree


ValueError: Unknown label type: 'continuous'

In [26]:
for value in unique_values:
    # Filter the DataFrame for the current unique value
    subset = df[df['column_name'] == value]

    # Prepare the feature matrix X and target variable y
    X = subset[['1. open', '2. high', '3. low', '6. volume']]  # Adjust column names as per your data
    y = subset['5. adjusted close']  # Adjust column names as per your data

    # Create a decision tree classifier
    clf = DecisionTreeClassifier()
    clf.fit(X, y)

    # Visualize the decision tree
    dot_data = tree.export_graphviz(clf, out_file=None,
                                   feature_names=X.columns,
                                   class_names=y.unique().astype(str),
                                   filled=True, rounded=True,
                                   special_characters=True)
    graph = graphviz.Source(dot_data)
    graph.render(f"decision_tree_{value}")  # Save the visualization to a file (optional)
    graph.view()  # Display the decision tree


KeyError: 'column_name'