<a href="https://colab.research.google.com/github/benchov/Machine_Learning_for_Trading_Knowledge/blob/main/Market_Analysis_I.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
# !pip install kneed
# !pip install alpaca-trade-api
# !pip install --upgrade pandas
# !pip install --upgrade pandas-datareader
# !pip install yfinance

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


### Market Analysis for Paper Trading




0.   Connect to the market data provider api
1.   Get Data
2.   Filter available assets in Alpaca Market
3.   Find Cointegrated pairs for Statistical Arbitrage
4.   Pick cointegrated pairs and detect trend with HMM Clustering
5.   Detect entry point, target price and stop price by the chart
6.   Create trading report.



#### Imports

In [14]:
from datetime import datetime
# remove unwanted warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# data extraction and management
import numpy as np
import pandas as pd
from pandas_datareader import DataReader
from pandas_datareader.nasdaq_trader  import get_nasdaq_symbols

# feature engineering
from sklearn.preprocessing import StandardScaler

# machine learing related imports
from sklearn.cluster import KMeans
from sklearn import metrics
from kneed import KneeLocator

# cointegration and statistic
from statsmodels.tsa.stattools import coint
import statsmodels.api as simplefilter

# reporting and visualisation
import matplotlib.pyplot as plt
import matplotlib.cm as cm
%matplotlib inline

# market data
import alpaca_trade_api as alpaca
import yfinance as yf

#### 0.0 Connect to the market data provider

In [11]:
api = alpaca.REST(key_id, secret_key, 'https://paper-api.alpaca.markets')
api.get_account()

Account({   'account_blocked': False,
    'account_number': 'PA3OJP423MLE',
    'accrued_fees': '0',
    'balance_asof': '2022-12-23',
    'bod_dtbp': '0',
    'buying_power': '200000',
    'cash': '100000',
    'created_at': '2022-12-22T19:56:32.073526Z',
    'crypto_status': 'ACTIVE',
    'crypto_tier': 0,
    'currency': 'USD',
    'daytrade_count': 0,
    'daytrading_buying_power': '0',
    'effective_buying_power': '200000',
    'equity': '100000',
    'id': 'd38ec6c2-a23b-4708-a064-cda26744e218',
    'initial_margin': '0',
    'last_equity': '100000',
    'last_maintenance_margin': '0',
    'long_market_value': '0',
    'maintenance_margin': '0',
    'multiplier': '2',
    'non_marginable_buying_power': '100000',
    'pattern_day_trader': False,
    'pending_transfer_in': '0',
    'portfolio_value': '100000',
    'position_market_value': '0',
    'regt_buying_power': '200000',
    'short_market_value': '0',
    'shorting_enabled': True,
    'sma': '100000',
    'status': 'ACTIVE'

#### 1.0 Get Data

In [12]:
# get list of available stocks 
asset_list =api.list_assets()
asset_list = [asset.symbol for asset in asset_list if (asset.status == 'active') & (asset.exchange == 'OTC')]
len(asset_list), asset_list[:10]

(403,
 ['TSLVF',
  'SHIIY',
  'URRND',
  'RVIC',
  'SQBGQ',
  'SRRRF',
  'STWRY',
  'SMTSF',
  'TRIRF',
  'WBEVQ'])

In [15]:
start_date = '2017-01-01'
end_date = '2022-12-27'
data = yf.download(asset_list, start_date, end_date)
data

[*********************100%***********************]  403 of 403 completed

20 Failed downloads:
- NMNZD: No timezone found, symbol may be delisted
- SRRRF: No timezone found, symbol may be delisted
- OGZPY: No timezone found, symbol may be delisted
- AKRBY: No timezone found, symbol may be delisted
- BNCTF: No timezone found, symbol may be delisted
- ELNBF: No timezone found, symbol may be delisted
- SQBGQ: No timezone found, symbol may be delisted
- TMPLS: No timezone found, symbol may be delisted
- SBDCD: No timezone found, symbol may be delisted
- SPNNF: No timezone found, symbol may be delisted
- LAIXY: No timezone found, symbol may be delisted
- JBINF: No timezone found, symbol may be delisted
- AVISF: No timezone found, symbol may be delisted
- MONGD: No timezone found, symbol may be delisted
- NWFFF: No timezone found, symbol may be delisted
- ACMSY: No timezone found, symbol may be delisted
- BCHTF: No timezone found, symbol may be delisted
- ORPHY: No timezone found, symbol may

Unnamed: 0_level_0,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,AAGIY,ACGBY,ACHHY,ACKAY,ACLLY,ACMSY,ACPRD,ACUT,ADDYY,ADRNY,...,WMMVY,WXXWY,XIACY,YARIY,YZCAY,ZLNDY,ZMENY,ZSANQ,ZURVY,ZVOI
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2017-01-03,20.554806,6.982244,10.201818,23.594398,,,,,72.392563,17.548058,...,187000,,,19800,5871,943,,2766.0,79000,
2017-01-04,20.554806,6.995684,10.399336,23.594398,,,,,71.328384,17.480627,...,142000,,,8000,16181,624,,9743.0,77700,
2017-01-05,21.083021,6.941923,10.646234,21.984615,,,,,71.365715,17.505915,...,273800,,,9800,21955,0,,8763.0,55600,
2017-01-06,21.192307,7.042726,10.438839,21.392782,,,,,71.291046,17.337343,...,96100,,,2400,8846,1591,,28827.0,76900,
2017-01-09,21.474627,7.056166,10.241322,21.455908,,,,,71.151016,17.480627,...,188900,,,14400,5470,0,,8197.0,58600,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-19,42.810001,8.220000,10.700000,27.340000,20.000000,,0.259,1.30,64.089996,28.820000,...,144300,44100.0,4786.0,12500,3011,23524,1700.0,,52000,249300.0
2022-12-20,43.549999,8.320000,10.750000,27.340000,20.500000,,0.259,1.30,63.009998,29.150000,...,59600,47500.0,32053.0,20700,993,28232,10400.0,,44900,219200.0
2022-12-21,43.880001,8.310000,10.500000,27.340000,20.400000,,0.259,1.15,66.699997,29.350000,...,32600,25300.0,43277.0,19000,1691,18247,48200.0,,53000,61500.0
2022-12-22,42.669998,8.260000,10.050000,27.340000,19.700001,,0.259,1.30,66.400002,29.440001,...,52000,62800.0,23471.0,20600,1982,26244,1000.0,,111100,132800.0


#### 1.1 Clear Data

In [17]:
data.dropna(inplace=True, axis=1)
data.head()

Unnamed: 0_level_0,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,AAGIY,ACGBY,ACHHY,ACKAY,ADDYY,ADRNY,ADXS,AFIIQ,AGESY,AHKSY,...,VYBED,WEGRY,WEICY,WFAFY,WHGLY,WMMVY,YARIY,YZCAY,ZLNDY,ZURVY
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2017-01-03,20.554806,6.982244,10.201818,23.594398,72.392563,17.548058,8987.955078,20.299999,27.206774,17.440001,...,0,15814,91600,4200,40900,187000,19800,5871,943,79000
2017-01-04,20.554806,6.995684,10.399336,23.594398,71.328384,17.480627,9527.952148,20.48,27.574615,17.65,...,0,2358,27200,500,14000,142000,8000,16181,624,77700
2017-01-05,21.083021,6.941923,10.646234,21.984615,71.365715,17.505915,9455.953125,19.950001,27.370258,17.58,...,0,5111,15600,800,18100,273800,9800,21955,0,55600
2017-01-06,21.192307,7.042726,10.438839,21.392782,71.291046,17.337343,9875.950195,20.139999,27.608673,17.4,...,0,2015,24800,1300,23400,96100,2400,8846,1591,76900
2017-01-09,21.474627,7.056166,10.241322,21.455908,71.151016,17.480627,10103.949219,19.75,27.588236,17.4,...,0,2392,66000,300,11300,188900,14400,5470,0,58600


In [19]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1506 entries, 2017-01-03 to 2022-12-23
Columns: 1782 entries, ('Adj Close', 'AAGIY') to ('Volume', 'ZURVY')
dtypes: float64(1485), int64(297)
memory usage: 20.5 MB


#### 2.0 Feature Engineering

In [20]:
# create dataframe which cointains return and volatility information
df_extended = pd.DataFrame(data.pct_change().mean() * 255, columns=['returns'])
df_extended['volatility'] = data.pct_change().std() * np.sqrt(255)
df_extended.head()

Unnamed: 0,Unnamed: 1,returns,volatility
Adj Close,AAGIY,0.173316,0.292892
Adj Close,ACGBY,0.0546,0.230815
Adj Close,ACHHY,0.164291,0.548021
Adj Close,ACKAY,0.137869,0.473616
Adj Close,ADDYY,0.048171,0.352911
