In [1]:
import warnings
warnings.filterwarnings('ignore')

In [13]:
from pathlib import Path
import requests
from io import BytesIO
from zipfile import ZipFile, BadZipFile

import numpy as np
import pandas as pd
import pandas_datareader.data as web
from sklearn.datasets import fetch_openml
from openbb_terminal.sdk import openbb
import os
path='/Users/Massimiliano'
os.chdir(path)

pd.set_option('display.expand_frame_repr', False)

In [8]:
DATA_STORE = 'assets.h5'

In [14]:
df = (pd.read_csv('/Users/Massimiliano/wiki_prices.csv',
        parse_dates=['date'],
        index_col=['date', 'ticker'],
        infer_datetime_format=True).sort_index())

print(df.info(show_counts=True))
with pd.HDFStore(DATA_STORE) as store:
        store.put('quandl/wiki/prices', df)

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 15389314 entries, (Timestamp('1962-01-02 00:00:00'), 'ARNC') to (Timestamp('2018-03-27 00:00:00'), 'ZUMZ')
Data columns (total 12 columns):
 #   Column       Non-Null Count     Dtype  
---  ------       --------------     -----  
 0   open         15388776 non-null  float64
 1   high         15389259 non-null  float64
 2   low          15389259 non-null  float64
 3   close        15389313 non-null  float64
 4   volume       15389314 non-null  float64
 5   ex-dividend  15389314 non-null  float64
 6   split_ratio  15389313 non-null  float64
 7   adj_open     15388776 non-null  float64
 8   adj_high     15389259 non-null  float64
 9   adj_low      15389259 non-null  float64
 10  adj_close    15389313 non-null  float64
 11  adj_volume   15389314 non-null  float64
dtypes: float64(12)
memory usage: 1.4+ GB
None


In [15]:
df = web.DataReader(name='SP500', data_source='fred', start=2009).squeeze().to_frame('close')
print(df.info())
with pd.HDFStore(DATA_STORE) as store:
    store.put('sp500/fred', df)

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2610 entries, 2013-09-30 to 2023-09-29
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   close   2518 non-null   float64
dtypes: float64(1)
memory usage: 40.8 KB
None


In [16]:
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
df = pd.read_html(url, header=0)[0]

In [17]:
df.head()

Unnamed: 0,Symbol,Security,GICS Sector,GICS Sub-Industry,Headquarters Location,Date added,CIK,Founded
0,MMM,3M,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1957-03-04,66740,1902
1,AOS,A. O. Smith,Industrials,Building Products,"Milwaukee, Wisconsin",2017-07-26,91142,1916
2,ABT,Abbott,Health Care,Health Care Equipment,"North Chicago, Illinois",1957-03-04,1800,1888
3,ABBV,AbbVie,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
4,ACN,Accenture,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373,1989


In [18]:
df.columns = ['ticker', 'name', 'gics_sector', 'gics_sub_industry',
                'location', 'first_added', 'cik', 'founded']

In [19]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 503 entries, 0 to 502
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   ticker             503 non-null    object
 1   name               503 non-null    object
 2   gics_sector        503 non-null    object
 3   gics_sub_industry  503 non-null    object
 4   location           503 non-null    object
 5   first_added        493 non-null    object
 6   cik                503 non-null    int64 
 7   founded            503 non-null    object
dtypes: int64(1), object(7)
memory usage: 31.6+ KB
None


In [20]:
with pd.HDFStore(DATA_STORE) as store:
    store.put('sp500/stocks', df)

In [21]:

exchanges = ['NASDAQ.csv', 'AMEX.csv', 'NYSE.csv']
df = pd.concat([pd.read_csv(format(ex)) for ex in exchanges]).dropna(how='all', axis=1)

df = df[~df.index.duplicated()]
df=df.rename(columns={"Symbol":"ticker"})
print(df.info()) 

<class 'pandas.core.frame.DataFrame'>
Index: 4314 entries, 0 to 4313
Data columns (total 11 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   ticker      4313 non-null   object 
 1   Name        4314 non-null   object 
 2   Last Sale   4314 non-null   object 
 3   Net Change  4314 non-null   float64
 4   % Change    4313 non-null   object 
 5   Market Cap  4302 non-null   float64
 6   Country     4313 non-null   object 
 7   IPO Year    2622 non-null   float64
 8   Volume      4314 non-null   int64  
 9   Sector      4184 non-null   object 
 10  Industry    4184 non-null   object 
dtypes: float64(3), int64(1), object(7)
memory usage: 404.4+ KB
None


In [26]:
mcap=df.rename(columns = {'Market Cap':'mktcap'}, inplace = True)

with pd.HDFStore(DATA_STORE) as store:
    store.put('us_equities/stocks', df.set_index('ticker'))

df.info()


<class 'pandas.core.frame.DataFrame'>
Index: 4314 entries, 0 to 4313
Data columns (total 11 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   ticker      4313 non-null   object 
 1   Name        4314 non-null   object 
 2   Last Sale   4314 non-null   object 
 3   Net Change  4314 non-null   float64
 4   % Change    4313 non-null   object 
 5   mktcap      4302 non-null   float64
 6   Country     4313 non-null   object 
 7   IPO Year    2622 non-null   float64
 8   Volume      4314 non-null   int64  
 9   Sector      4184 non-null   object 
 10  Industry    4184 non-null   object 
dtypes: float64(3), int64(1), object(7)
memory usage: 404.4+ KB


In [29]:
sp500_overview = openbb.stocks.screener.screener_data(preset_loaded='sp500_filter.ini', data_type = 'overview')
sp500_ownership = openbb.stocks.screener.screener_data(preset_loaded='sp500_filter', data_type = 'ownership')
sp500_performance = openbb.stocks.screener.screener_data(preset_loaded='sp500_filter', data_type = 'performance')
sp500_technical = openbb.stocks.screener.screener_data(preset_loaded='sp500_filter', data_type = 'technical')
sp500_valuation = openbb.stocks.screener.screener_data(preset_loaded='sp500_filter', data_type = 'valuation')

sp500_overview = sp500_overview.convert_dtypes()
sp500_ownership = sp500_ownership.convert_dtypes()
sp500_performance = sp500_performance.convert_dtypes()
sp500_technical = sp500_technical.convert_dtypes()
sp500_valuation = sp500_valuation.convert_dtypes()

sp500_overview.drop(columns = ['P/E'], inplace = True)
sp500_overview.set_index(keys = ['Ticker', 'Price', 'Change', 'Volume'], inplace = True)
sp500_performance.drop(columns = ['Avg Volume', 'Price', 'Change', 'Volume'], inplace = True)
sp500_performance.set_index(keys = ['Ticker'], inplace = True)
sp500_ownership.drop(columns = ['Price', 'Change', 'Volume', 'Market Cap'], inplace = True)
sp500_ownership.set_index(keys = ['Ticker'], inplace = True)
sp500_technical.drop(columns = ['Price', 'Change', 'Volume'], inplace = True)
sp500_technical.set_index(keys = ['Ticker'], inplace = True)
sp500_valuation.drop(columns = ['Price', 'Change', 'Volume', 'Market Cap'], inplace = True)
sp500_valuation.set_index(keys = ['Ticker'], inplace = True)


sp500_df = sp500_overview.join(sp500_valuation)
sp500_df = sp500_df.join(sp500_ownership)
sp500_df = sp500_df.join(sp500_performance)
sp500_df = sp500_df.join(sp500_technical)



INFO:openbb_terminal.stocks.screener.finviz_model:START
INFO:openbb_terminal.stocks.screener.finviz_model:{"INPUT": {"preset_loaded": "sp500_filter.ini", "data_type": "overview", "limit": "-1", "ascend": "False", "chart": "False"}, "VIRTUAL_PATH": "stocks.screener.screener_data", "CHART": false}
INFO:openbb_terminal.stocks.screener.finviz_model:START


ERROR:openbb_terminal.stocks.screener.finviz_model:Exception: 'sp500_filter.ini'
Traceback (most recent call last):
  File "/Users/Massimiliano/anaconda3/envs/python39/lib/python3.9/site-packages/openbb_terminal/decorators.py", line 71, in wrapper
    value = func(*args, **kwargs)
  File "/Users/Massimiliano/anaconda3/envs/python39/lib/python3.9/site-packages/openbb_terminal/stocks/screener/finviz_model.py", line 144, in get_screener_data
    preset_filter.read(preset_choices[preset_loaded])
KeyError: 'sp500_filter.ini'
INFO:openbb_terminal.stocks.screener.finviz_model:END
INFO:openbb_terminal.stocks.screener.finviz_model:START
INFO:openbb_terminal.stocks.screener.finviz_model:{"INPUT": {"preset_loaded": "sp500_filter", "data_type": "ownership", "limit": "-1", "ascend": "False", "chart": "False"}, "VIRTUAL_PATH": "stocks.screener.screener_data", "CHART": false}
INFO:openbb_terminal.stocks.screener.finviz_model:START


[Info] loading page [#-----------------------------] 1/26 

ERROR:openbb_terminal.stocks.screener.finviz_model:Exception: 'NoneType' object has no attribute 'findAll'
Traceback (most recent call last):
  File "/Users/Massimiliano/anaconda3/envs/python39/lib/python3.9/site-packages/openbb_terminal/decorators.py", line 71, in wrapper
    value = func(*args, **kwargs)
  File "/Users/Massimiliano/anaconda3/envs/python39/lib/python3.9/site-packages/openbb_terminal/stocks/screener/finviz_model.py", line 180, in get_screener_data
    df_screen = screen.screener_view(
  File "/Users/Massimiliano/anaconda3/envs/python39/lib/python3.9/site-packages/finvizfinance/screener/overview.py", line 278, in screener_view
    rows = table.findAll("tr")
AttributeError: 'NoneType' object has no attribute 'findAll'
INFO:openbb_terminal.stocks.screener.finviz_model:END
INFO:openbb_terminal.stocks.screener.finviz_model:START
INFO:openbb_terminal.stocks.screener.finviz_model:{"INPUT": {"preset_loaded": "sp500_filter", "data_type": "performance", "limit": "-1", "ascend": 

[Info] loading page [#-----------------------------] 1/26 

ERROR:openbb_terminal.stocks.screener.finviz_model:Exception: 'NoneType' object has no attribute 'findAll'
Traceback (most recent call last):
  File "/Users/Massimiliano/anaconda3/envs/python39/lib/python3.9/site-packages/openbb_terminal/decorators.py", line 71, in wrapper
    value = func(*args, **kwargs)
  File "/Users/Massimiliano/anaconda3/envs/python39/lib/python3.9/site-packages/openbb_terminal/stocks/screener/finviz_model.py", line 180, in get_screener_data
    df_screen = screen.screener_view(
  File "/Users/Massimiliano/anaconda3/envs/python39/lib/python3.9/site-packages/finvizfinance/screener/overview.py", line 278, in screener_view
    rows = table.findAll("tr")
AttributeError: 'NoneType' object has no attribute 'findAll'
INFO:openbb_terminal.stocks.screener.finviz_model:END
INFO:openbb_terminal.stocks.screener.finviz_model:START
INFO:openbb_terminal.stocks.screener.finviz_model:{"INPUT": {"preset_loaded": "sp500_filter", "data_type": "technical", "limit": "-1", "ascend": "F

[Info] loading page [#-----------------------------] 1/26 

ERROR:openbb_terminal.stocks.screener.finviz_model:Exception: 'NoneType' object has no attribute 'findAll'
Traceback (most recent call last):
  File "/Users/Massimiliano/anaconda3/envs/python39/lib/python3.9/site-packages/openbb_terminal/decorators.py", line 71, in wrapper
    value = func(*args, **kwargs)
  File "/Users/Massimiliano/anaconda3/envs/python39/lib/python3.9/site-packages/openbb_terminal/stocks/screener/finviz_model.py", line 180, in get_screener_data
    df_screen = screen.screener_view(
  File "/Users/Massimiliano/anaconda3/envs/python39/lib/python3.9/site-packages/finvizfinance/screener/overview.py", line 278, in screener_view
    rows = table.findAll("tr")
AttributeError: 'NoneType' object has no attribute 'findAll'
INFO:openbb_terminal.stocks.screener.finviz_model:END
INFO:openbb_terminal.stocks.screener.finviz_model:START
INFO:openbb_terminal.stocks.screener.finviz_model:{"INPUT": {"preset_loaded": "sp500_filter", "data_type": "valuation", "limit": "-1", "ascend": "F

[Info] loading page [#-----------------------------] 1/26 

ERROR:openbb_terminal.stocks.screener.finviz_model:Exception: 'NoneType' object has no attribute 'findAll'
Traceback (most recent call last):
  File "/Users/Massimiliano/anaconda3/envs/python39/lib/python3.9/site-packages/openbb_terminal/decorators.py", line 71, in wrapper
    value = func(*args, **kwargs)
  File "/Users/Massimiliano/anaconda3/envs/python39/lib/python3.9/site-packages/openbb_terminal/stocks/screener/finviz_model.py", line 180, in get_screener_data
    df_screen = screen.screener_view(
  File "/Users/Massimiliano/anaconda3/envs/python39/lib/python3.9/site-packages/finvizfinance/screener/overview.py", line 278, in screener_view
    rows = table.findAll("tr")
AttributeError: 'NoneType' object has no attribute 'findAll'
INFO:openbb_terminal.stocks.screener.finviz_model:END


AttributeError: 'list' object has no attribute 'convert_dtypes'

In [49]:
sp500_df.to_csv('SP500_df.csv')

sp500_df = (pd.read_csv('SP500_df.csv'))
print(sp500_df.info(null_counts=True))
with pd.HDFStore(DATA_STORE) as store:
        store.put('SP500_df', sp500_df)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 503 entries, 0 to 502
Data columns (total 50 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Ticker         503 non-null    object 
 1   Price          503 non-null    float64
 2   Change         503 non-null    float64
 3   Volume         503 non-null    int64  
 4   Company        503 non-null    object 
 5   Sector         503 non-null    object 
 6   Industry       503 non-null    object 
 7   Country        503 non-null    object 
 8   Market Cap     503 non-null    float64
 9   P/E            463 non-null    float64
 10  Fwd P/E        499 non-null    float64
 11  PEG            382 non-null    float64
 12  P/S            503 non-null    float64
 13  P/B            468 non-null    float64
 14  P/C            499 non-null    float64
 15  P/FCF          433 non-null    float64
 16  EPS this Y     501 non-null    float64
 17  EPS next Y     501 non-null    float64
 18  EPS past 5

## MNIST Data

In [50]:
mnist = fetch_openml('mnist_784', version=1)

In [51]:
print(mnist.DESCR)

**Author**: Yann LeCun, Corinna Cortes, Christopher J.C. Burges  
**Source**: [MNIST Website](http://yann.lecun.com/exdb/mnist/) - Date unknown  
**Please cite**:  

The MNIST database of handwritten digits with 784 features, raw data available at: http://yann.lecun.com/exdb/mnist/. It can be split in a training set of the first 60,000 examples, and a test set of 10,000 examples  

It is a subset of a larger set available from NIST. The digits have been size-normalized and centered in a fixed-size image. It is a good database for people who want to try learning techniques and pattern recognition methods on real-world data while spending minimal efforts on preprocessing and formatting. The original black and white (bilevel) images from NIST were size normalized to fit in a 20x20 pixel box while preserving their aspect ratio. The resulting images contain grey levels as a result of the anti-aliasing technique used by the normalization algorithm. the images were centered in a 28x28 image b

In [52]:
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [53]:
mnist_path = Path('mnist')
if not mnist_path.exists():
    mnist_path.mkdir()

In [54]:
np.save(mnist_path / 'data', mnist.data.astype(np.uint8))
np.save(mnist_path / 'labels', mnist.target.astype(np.uint8))