In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
from bs4 import BeautifulSoup

In [2]:
# Get all the tickers of S&P 100 from Wikipedia

url = 'https://en.wikipedia.org/wiki/S%26P_100'
response = requests.get(url)

if response.status_code == 200:
    parsed_data = BeautifulSoup(response.content, 'html.parser')
    
    table = parsed_data.find('table', {'id': 'constituents'})
    
    if table:
        sn_p100 = [row.find_all('td')[0].get_text(strip=True) for row in table.find_all('tr') if row.find_all('td')]
    else:
        print("Table not found.")
else:
    print(f"Failed to fetch. Status code: {response.status_code}")

sn_p100

['AAPL',
 'ABBV',
 'ABT',
 'ACN',
 'ADBE',
 'AIG',
 'AMD',
 'AMGN',
 'AMT',
 'AMZN',
 'AVGO',
 'AXP',
 'BA',
 'BAC',
 'BK',
 'BKNG',
 'BLK',
 'BMY',
 'BRK.B',
 'C',
 'CAT',
 'CHTR',
 'CL',
 'CMCSA',
 'COF',
 'COP',
 'COST',
 'CRM',
 'CSCO',
 'CVS',
 'CVX',
 'DE',
 'DHR',
 'DIS',
 'DOW',
 'DUK',
 'EMR',
 'EXC',
 'F',
 'FDX',
 'GD',
 'GE',
 'GILD',
 'GM',
 'GOOG',
 'GOOGL',
 'GS',
 'HD',
 'HON',
 'IBM',
 'INTC',
 'JNJ',
 'JPM',
 'KHC',
 'KO',
 'LIN',
 'LLY',
 'LMT',
 'LOW',
 'MA',
 'MCD',
 'MDLZ',
 'MDT',
 'MET',
 'META',
 'MMM',
 'MO',
 'MRK',
 'MS',
 'MSFT',
 'NEE',
 'NFLX',
 'NKE',
 'NVDA',
 'ORCL',
 'PEP',
 'PFE',
 'PG',
 'PM',
 'PYPL',
 'QCOM',
 'RTX',
 'SBUX',
 'SCHW',
 'SO',
 'SPG',
 'T',
 'TGT',
 'TMO',
 'TMUS',
 'TSLA',
 'TXN',
 'UNH',
 'UNP',
 'UPS',
 'USB',
 'V',
 'VZ',
 'WFC',
 'WMT',
 'XOM']

In [3]:
# Correct ticker name for Berkshire Hathaway

i = sn_p100.index('BRK.B')
sn_p100[i] = 'BRK-B'
sn_p100

['AAPL',
 'ABBV',
 'ABT',
 'ACN',
 'ADBE',
 'AIG',
 'AMD',
 'AMGN',
 'AMT',
 'AMZN',
 'AVGO',
 'AXP',
 'BA',
 'BAC',
 'BK',
 'BKNG',
 'BLK',
 'BMY',
 'BRK-B',
 'C',
 'CAT',
 'CHTR',
 'CL',
 'CMCSA',
 'COF',
 'COP',
 'COST',
 'CRM',
 'CSCO',
 'CVS',
 'CVX',
 'DE',
 'DHR',
 'DIS',
 'DOW',
 'DUK',
 'EMR',
 'EXC',
 'F',
 'FDX',
 'GD',
 'GE',
 'GILD',
 'GM',
 'GOOG',
 'GOOGL',
 'GS',
 'HD',
 'HON',
 'IBM',
 'INTC',
 'JNJ',
 'JPM',
 'KHC',
 'KO',
 'LIN',
 'LLY',
 'LMT',
 'LOW',
 'MA',
 'MCD',
 'MDLZ',
 'MDT',
 'MET',
 'META',
 'MMM',
 'MO',
 'MRK',
 'MS',
 'MSFT',
 'NEE',
 'NFLX',
 'NKE',
 'NVDA',
 'ORCL',
 'PEP',
 'PFE',
 'PG',
 'PM',
 'PYPL',
 'QCOM',
 'RTX',
 'SBUX',
 'SCHW',
 'SO',
 'SPG',
 'T',
 'TGT',
 'TMO',
 'TMUS',
 'TSLA',
 'TXN',
 'UNH',
 'UNP',
 'UPS',
 'USB',
 'V',
 'VZ',
 'WFC',
 'WMT',
 'XOM']

In [4]:
# Function for preprocessing data of a single ticker to desired format of Zipline

def preprocess(data):
    data.reset_index(inplace=True)
    data.drop(columns=['Adj Close'], inplace=True)
    data.rename(columns={'Date': 'timestamp', 'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume', }, inplace=True)
    data['timestamp'] = pd.to_datetime(data['timestamp'])
    data['volume'] = data['volume'].astype('float64')
    data.sort_values(by='timestamp', inplace=True)
    data.reset_index(drop=True, inplace=True)

    return data

In [5]:
# # Fetching historical data for each ticker in S&P100

# start_date = '2010-01-01'
# end_date = '2021-07-31'

# snp100_data_dict = {}

# for ticker in sn_p100:
#     data = yf.download(ticker, start=start_date, end=end_date, interval='1d')
#     preprocess(data).to_csv(f'/home/yakub/Documents/Python/SnP100/Daily/{ticker}.csv', index=False)
#     snp100_data_dict[ticker] = data

## Register and Ingest S&P100 Bundle

In [18]:
# from zipline.data import bundles

# bundle_name = 'snp100'

# bundles.register(
#     bundle_name,
#     bundles.csvdir.csvdir_equities(
#         ['daily'],
#         '/home/yakub/Documents/Python/SnP100'
#     ),
#     calendar_name='NYSE',

# )

# bundles.bundles

  bundles.register(


mappingproxy({'quandl': RegisteredBundle(calendar_name='NYSE', start_session=None, end_session=None, minutes_per_day=390, ingest=<function quandl_bundle at 0x7f9c139941f0>, create_writers=True),
              'quantopian-quandl': RegisteredBundle(calendar_name='NYSE', start_session=None, end_session=None, minutes_per_day=390, ingest=<function quantopian_quandl_bundle at 0x7f9c139943a0>, create_writers=False),
              'csvdir': RegisteredBundle(calendar_name='NYSE', start_session=None, end_session=None, minutes_per_day=390, ingest=<function csvdir_bundle at 0x7f9c139945e0>, create_writers=True),
              'snp100': RegisteredBundle(calendar_name='NYSE', start_session=None, end_session=None, minutes_per_day=390, ingest=<bound method CSVDIRBundle.ingest of <zipline.data.bundles.csvdir.CSVDIRBundle object at 0x7f9c11166830>>, create_writers=True)})

In [19]:
# bundles.ingest(bundle_name)

In [20]:
# bundle_data = bundles.load(bundle_name)
# sids = bundle_data.asset_finder.sids
# assets = bundle_data.asset_finder.retrieve_all(sids)

# print(assets)

[Equity(0 [AAPL]), Equity(1 [ABBV]), Equity(2 [ABT]), Equity(3 [ACN]), Equity(4 [ADBE]), Equity(5 [AIG]), Equity(6 [AMD]), Equity(7 [AMGN]), Equity(8 [AMT]), Equity(9 [AMZN]), Equity(10 [AVGO]), Equity(11 [AXP]), Equity(12 [BA]), Equity(13 [BAC]), Equity(14 [BK]), Equity(15 [BKNG]), Equity(16 [BLK]), Equity(17 [BMY]), Equity(18 [BRK-B]), Equity(19 [C]), Equity(20 [CAT]), Equity(21 [CHTR]), Equity(22 [CL]), Equity(23 [CMCSA]), Equity(24 [COF]), Equity(25 [COP]), Equity(26 [COST]), Equity(27 [CRM]), Equity(28 [CSCO]), Equity(29 [CVS]), Equity(30 [CVX]), Equity(31 [DE]), Equity(32 [DHR]), Equity(33 [DIS]), Equity(34 [DOW]), Equity(35 [DUK]), Equity(36 [EMR]), Equity(37 [EXC]), Equity(38 [F]), Equity(39 [FDX]), Equity(40 [GD]), Equity(41 [GE]), Equity(42 [GILD]), Equity(43 [GM]), Equity(44 [GOOG]), Equity(45 [GOOGL]), Equity(46 [GS]), Equity(47 [HD]), Equity(48 [HON]), Equity(49 [IBM]), Equity(50 [INTC]), Equity(51 [JNJ]), Equity(52 [JPM]), Equity(53 [KHC]), Equity(54 [KO]), Equity(55 [LIN

In [47]:
# !zipline bundles

In [22]:
# !zipline ingest -b snp100

Error: No bundle registered with the name 'snp100'


In [26]:
# !zipline bundles

csvdir <no ingestions>
quandl 2023-11-29 14:09:18.799451
quandl 2023-11-29 14:07:32.423012
quandl 2023-11-29 12:39:15.374615
quandl 2023-11-29 12:05:56.774662
quandl 2023-11-29 10:27:35.030664
quantopian-quandl 2023-11-30 02:02:00.355768
quantopian-quandl 2023-11-30 01:12:56.278980
quantopian-quandl 2023-11-30 01:11:49.450274
quantopian-quandl 2023-11-29 14:06:08.862893
quantopian-quandl 2023-11-29 13:55:59.391084
quantopian-quandl 2023-11-29 12:17:04.159981
quantopian-quandl 2023-11-29 10:27:07.529834
quantopian-quandl 2023-11-29 10:14:48.887188
snp100 2023-12-23 17:26:36.160004
snp100 2023-12-23 17:22:12.808944
snp100 2023-12-23 17:19:25.530514
snp100 2023-12-23 15:00:22.357639
snp100 2023-12-23 14:40:14.203424
snp100 2023-12-20 10:15:06.308878
snp100 2023-12-20 10:13:03.416763
snp100 2023-12-16 17:38:48.604072
snp100 2023-12-16 17:38:23.569546
snp100 2023-12-16 17:37:59.246961


In [6]:
import yfinance as yf

start = "2020-01-01" 
end = "2020-12-31"

data = yf.download("TSLA", start=start, end=end)

print(data.head())

[*********************100%%**********************]  1 of 1 completed


1 Failed download:
['TSLA']: Exception('%ticker%: No price data found, symbol may be delisted (1d 2020-01-01 -> 2020-12-31)')



Empty DataFrame
Columns: [Open, High, Low, Close, Adj Close, Volume]
Index: []


In [25]:
ticker = yf.Ticker("TSLA")
data = ticker.history(start=start, end=end)  # Use ticker.history instead of yf.download
print(data.head())

TSLA: No price data found, symbol may be delisted (1d 2020-01-01 -> 2020-12-31)


Empty DataFrame
Columns: [Open, High, Low, Close, Adj Close, Volume]
Index: []


In [10]:
%matplotlib inline
import quantstats as qs

# extend pandas functionality with metrics, etc.
qs.extend_pandas()

# fetch the daily returns for a stock
stock = qs.utils.download_returns('META')

# show sharpe ratio
qs.stats.sharpe(stock)

# or using extend_pandas() :)
stock.sharpe()

Failed to get ticker 'META' reason: unsupported operand type(s) for -: 'datetime.datetime' and 'str'


[*********************100%%**********************]  1 of 1 completed


1 Failed download:
['META']: Exception('%ticker%: No timezone found, symbol may be delisted')





ValueError: attempt to get argmax of an empty sequence

In [28]:
data_tesla = pd.read_csv("/home/yakub/Documents/Python/SnP100/daily/TSLA.csv")
data_tesla.head()

Unnamed: 0,timestamp,open,high,low,close,volume
0,2010-06-29,1.266667,1.666667,1.169333,1.592667,281494500.0
1,2010-06-30,1.719333,2.028,1.553333,1.588667,257806500.0
2,2010-07-01,1.666667,1.728,1.351333,1.464,123282000.0
3,2010-07-02,1.533333,1.54,1.247333,1.28,77097000.0
4,2010-07-06,1.333333,1.333333,1.055333,1.074,103003500.0


In [31]:
type(data_tesla['timestamp'])

pandas.core.series.Series

In [32]:
data_tesla['timestamp'] = pd.to_datetime(data_tesla['timestamp'])

In [33]:
type(data_tesla['timestamp'])

pandas.core.series.Series

In [37]:
%matplotlib inline
import quantstats as qs

# extend pandas functionality with metrics, etc.
qs.extend_pandas()

data_tesla['returns'] = data_tesla['close'].pct_change()
sharpe_ratio = qs.stats.sharpe(data_tesla['returns'])
print("Sharpe Ratio:", sharpe_ratio)


Sharpe Ratio: 1.0757635663063325


In [41]:
data_tesla = data_tesla[1:]


In [43]:
data_tesla.head()

Unnamed: 0,timestamp,open,high,low,close,volume,returns
1,2010-06-30,1.719333,2.028,1.553333,1.588667,257806500.0,-0.002511
2,2010-07-01,1.666667,1.728,1.351333,1.464,123282000.0,-0.078473
3,2010-07-02,1.533333,1.54,1.247333,1.28,77097000.0,-0.125683
4,2010-07-06,1.333333,1.333333,1.055333,1.074,103003500.0,-0.160937
5,2010-07-07,1.093333,1.108667,0.998667,1.053333,103825500.0,-0.019243


In [42]:
data_tesla['timestamp'] = pd.to_datetime(data_tesla['timestamp'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_tesla['timestamp'] = pd.to_datetime(data_tesla['timestamp'])


In [24]:
qs.plots.snapshot(data_tesla['returns'], title='Tesla Performance', show=True)

TypeError: Concatenation operation is not implemented for NumPy arrays, use np.concatenate() instead. Please do not rely on this error; it may not be given on all Python implementations.

In [44]:
qs.reports.full(data_tesla['returns'])

AttributeError: 'int' object has no attribute 'days'

In [45]:
type(data_tesla['returns'])

pandas.core.series.Series