In [1]:
import requests
import pandas as pd
import lxml
import re
import json
from bs4 import BeautifulSoup as bs
from lxml import html

pd.options.display.max_columns = None
pd.options.display.max_rows = None

request_headers = {
    'Access-Control-Allow-Origin': '*',
    'Access-Control-Allow-Methods': 'GET',
    'Access-Control-Allow-Headers': 'Content-Type',
    'Access-Control-Max-Age': '3600',
    'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'}
base_url = 'https://www.macrotrends.net/stocks/charts/'
url_ticker_map = {'AMD': 'amd', 'NVDA': 'nvidia', 'INTC': 'intel'}

test_url = 'https://www.macrotrends.net/stocks/charts/AAPL/aapl/income-statement?freq=Q'
test_request = requests.get(test_url, request_headers)

soup = bs(test_request.content, 'html.parser')
print(soup.prettify())

<!DOCTYPE html>
<!--[if lt IE 7]>      <html class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
<!--[if IE 7]>         <html class="no-js lt-ie9 lt-ie8"> <![endif]-->
<!--[if IE 8]>         <html class="no-js lt-ie9"> <![endif]-->
<!--[if gt IE 8]><!-->
<html class="no-js">
 <!--<![endif]-->
 <head>
  <meta charset="utf-8"/>
  <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
  <link href="https://www.macrotrends.net/stocks/charts/AAPL/apple/income-statement" rel="canonical"/>
  <title>
   Apple Income Statement 2005-2021 | AAPL | MacroTrends
  </title>
  <meta content="Ten years of annual and quarterly income statements for Apple (AAPL). The income statement summarizes the revenues, expenses and profit generated by a business over an annual or quarterly period." name="description">
   <meta content="" name="robots">
    <link href="/assets/images/icons/FAVICON/macro-trends_favicon.ico" rel="shortcut icon" type="image/x-icon"/>
    <meta content="1228954C688F5907894001CD8E5

In [2]:
soup.find_all('script')

[<script src="//ajax.googleapis.com/ajax/libs/jquery/1.12.0/jquery.min.js"></script>,
 <script src="//maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js"></script>,
 <script language="javascript" src="/assets/javascript/jquery.iframetracker.js" type="text/javascript"></script>,
 <script language="javascript" src="/assets/javascript/jquery.sticky-kit.min.js" type="text/javascript"></script>,
 <script src="/assets/javascript/modernizr-2.6.2-respond-1.1.0.min.js" type="text/javascript"></script>,
 <script src="//www.fuelcdn.com/fuelux/3.13.0/js/fuelux.min.js"></script>,
 <script>InvestingChannelQueue = window.InvestingChannelQueue || [];</script>,
 <script async="" src="https://u5.investingchannel.com/static/uat.js"></script>,
 <script type="text/javascript">
     
         //Push Run command with the API-Key, so that UAT will start processing publishers request.
         InvestingChannelQueue.push(function() {
             ic_page = InvestingChannel.UAT.Run("df17ac1e-cc7f-11e8-8

In [3]:
def fetch_financials(ticker = 'AAPL', kind = 'bs'):
    request_url = base_url
    
    if(kind == 'bs'):
        request_url += ticker + '/' + url_ticker_map[ticker] + '/balance-sheet?freq=Q'
    elif(kind == 'is'):
        request_url += ticker + '/' + url_ticker_map[ticker] + '/income-statement?freq=Q'
    elif(kind == 'cfs'):
        request_url += ticker + '/' + url_ticker_map[ticker] + '/cash-flow-statement?freq=Q'
        
    request = requests.get(request_url, request_headers)
        
    x = re.compile(r' var originalData = (.*?);\r?\n',re.DOTALL)
    table = json.loads(x.findall(request.text)[0])
    headers = list(table[0].keys())
    headers.remove('popup_icon')
    result = []

    for row in table:
        soup = bs(row['field_name'])
        field_name = soup.select_one('a, span').text
        fields = list(row.values())[2:]
        fields.insert(0, field_name)
        result.append(fields)
    
    df = pd.DataFrame(result, columns = headers)
    
    df = df.T
    df = df.rename(columns= df.iloc[0])
    df = df.drop(['field_name'])
    df = df.reset_index(level = 0)
    df = df.rename(columns={"index": "date"})
    df['ticker'] = ticker
    
    return df

balance_sheets_df = pd.concat([fetch_financials('AMD', 'bs'), 
                               fetch_financials('NVDA', 'bs'), 
                               fetch_financials('INTC', 'bs')])
income_statements_df = pd.concat([fetch_financials('AMD', 'is'), 
                                  fetch_financials('NVDA', 'is'), 
                                  fetch_financials('INTC', 'is')])
cashflow_statements_df = pd.concat([fetch_financials('AMD', 'cfs'), 
                              fetch_financials('NVDA', 'cfs'), 
                              fetch_financials('INTC', 'cfs')])

In [4]:
# balance_sheets_df.to_csv('balance-sheets.csv', index=False)
# income_statements_df.to_csv('income-statements.csv', index=False)
# cashflow_statements_df.to_csv('cashflow-statements.csv', index=False)

In [17]:
import os
import datetime
import plotly
import cufflinks as cf
cf.go_offline()

from datetime import timedelta

print(os.listdir('../'))
keyfile = open('../team-rgb/apikey.txt', "r")
apikey = keyfile.readline()
keyfile.close()

prices_url = 'https://financialmodelingprep.com/api/v3/historical-price-full/'

def fetch_stock_list(ticker):
    request = requests.get(url = prices_url + ticker + '?timeseries=253&apikey=' + apikey) 
    data = request.json()
    
    result = pd.DataFrame(data['historical'])
    result = result[::-1]
    result.set_index('date', inplace=True)
    result['ticker'] = ticker
    
    return result

amd_prices = fetch_stock_list('AMD')
nvda_prices = fetch_stock_list('NVDA')
intc_prices = fetch_stock_list('INTC')

amd_prices.head()

['aapl', 'The Titans of Tech.ipynb', 'Image Detection.ipynb', '.DS_Store', 'apikey.txt', '.gitignore', 'Processing.ipynb', '.ipynb_checkpoints', '.git', 'team-rgb']


Unnamed: 0_level_0,open,high,low,close,adjClose,volume,unadjustedVolume,change,changePercent,vwap,label,changeOverTime,ticker
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-05-29,52.07,53.799999,51.849998,53.799999,53.799999,64948300.0,64948300.0,1.73,3.322,53.15,"May 29, 20",0.03322,AMD
2020-06-01,53.310001,54.110001,52.889999,53.630001,53.630001,36864400.0,36864400.0,0.32,0.6,53.54333,"June 01, 20",0.006,AMD
2020-06-02,53.450001,53.57,52.009998,53.540001,53.540001,42855200.0,42855200.0,0.09,0.168,53.04,"June 02, 20",0.00168,AMD
2020-06-03,53.599998,53.650002,52.330002,52.73,52.73,46765300.0,46765300.0,-0.87,-1.623,52.90333,"June 03, 20",-0.01623,AMD
2020-06-04,52.59,53.740002,52.200001,52.630001,52.630001,44869100.0,44869100.0,0.04,0.076,52.85667,"June 04, 20",0.00076,AMD


In [20]:
stock_prices_df = pd.concat([amd_prices, nvda_prices, intc_prices])
# stock_prices_df.to_csv('stock-prices.csv', index=False)

stock_prices_df

Unnamed: 0_level_0,open,high,low,close,adjClose,volume,unadjustedVolume,change,changePercent,vwap,label,changeOverTime,ticker
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-05-29,52.07,53.799999,51.849998,53.799999,53.799999,64948300.0,64948300.0,1.73,3.322,53.15,"May 29, 20",0.03322,AMD
2020-06-01,53.310001,54.110001,52.889999,53.630001,53.630001,36864400.0,36864400.0,0.32,0.6,53.54333,"June 01, 20",0.006,AMD
2020-06-02,53.450001,53.57,52.009998,53.540001,53.540001,42855200.0,42855200.0,0.09,0.168,53.04,"June 02, 20",0.00168,AMD
2020-06-03,53.599998,53.650002,52.330002,52.73,52.73,46765300.0,46765300.0,-0.87,-1.623,52.90333,"June 03, 20",-0.01623,AMD
2020-06-04,52.59,53.740002,52.200001,52.630001,52.630001,44869100.0,44869100.0,0.04,0.076,52.85667,"June 04, 20",0.00076,AMD
2020-06-05,52.990002,53.57,52.080002,53.099998,53.099998,53908300.0,53908300.0,0.11,0.208,52.91667,"June 05, 20",0.00208,AMD
2020-06-08,53.299999,53.349998,52.259998,52.970001,52.970001,39681700.0,39681700.0,-0.33,-0.619,52.86,"June 08, 20",-0.00619,AMD
2020-06-09,52.950001,56.459999,52.720001,56.389999,56.389999,79919100.0,79919100.0,3.44,6.497,55.19,"June 09, 20",0.06497,AMD
2020-06-10,57.200001,59.0,57.029999,57.439999,57.439999,78719400.0,78719400.0,0.24,0.42,57.82333,"June 10, 20",0.0042,AMD
2020-06-11,55.939999,56.68,52.799999,52.830002,52.830002,78715400.0,78715400.0,-3.11,-5.56,54.10333,"June 11, 20",-0.0556,AMD


In [23]:
# start = datetime.date.today()
# end = start - timedelta(days = 365) 
# print(start,end)

# amd_prices.iplot(kind='candlestick', title = 'AMD Stock Price (1 YR)', xTitle = 'Date')