In [1]:
from alpha_vantage.timeseries import TimeSeries
import matplotlib.pyplot as plt
import sys
import pandas as pd
import requests
from api_keys import AV_api

In [2]:
# define sectors
sectors = {
    'Semiconductors': ['NVDA', 'TSM', 'AVGO', 'AMD', 'INTC'],
    'Software': ['CRM', 'SAP', 'INTU', 'NOW', 'ZI'],
    'Consumer': ['AAPL', 'MSFT', 'SONY', 'LPL', 'SONO'],
    'Infotech': ['ACN', 'IBM', 'FI', 'INFY', 'CTSH']
}
sectors = pd.DataFrame(sectors)


In [26]:
all_data = []                                                                   # create an empty list to append to

start_date = '2018-01-01'                                                       # create start and end dates

end_date = '2023-12-04'

for industry, companies in sectors.items():                                       # iterate over each industry
    for ticker in companies:                                                    # iterate over each company in industry
        try:
            print (f'Gathering data for {industry}: {ticker}')
            
            ts = TimeSeries(key='1CSXU4M8SUPKSPG', output_format='pandas')     # make api call
            data, meta_data = ts.get_daily(symbol=ticker, outputsize='full')
            data_df = pd.DataFrame(data)

            data_df = data_df.sort_index()                                      # filter from start to end dates
            filtered_data = pd.DataFrame(data_df.loc[start_date:end_date])

            #Ticker, sector, Industry, Open, High, Low, Close, Volume
            filtered_data["Ticker"] = ticker                                    # add ticker column
            filtered_data["Industry"] = industry
            filtered_data["Sector"] = 'Technology'
            all_data.append(filtered_data)
        except:
            print(f'Company {ticker} not found')

Gathering data for Semiconductors: NVDA
Gathering data for Semiconductors: TSM
Gathering data for Semiconductors: AVGO
Gathering data for Semiconductors: AMD
Gathering data for Semiconductors: INTC
Gathering data for Software: CRM
Gathering data for Software: SAP
Gathering data for Software: INTU
Gathering data for Software: NOW
Gathering data for Software: ZI
Gathering data for Consumer: AAPL
Gathering data for Consumer: MSFT
Gathering data for Consumer: SONY
Gathering data for Consumer: LPL
Gathering data for Consumer: SONO
Gathering data for Infotech: ACN
Gathering data for Infotech: IBM
Gathering data for Infotech: FI
Gathering data for Infotech: INFY
Gathering data for Infotech: CTSH


In [29]:
final_data = pd.concat(all_data)                        # Convert to dataframe


In [31]:
all_data

[            1. open  2. high  3. low  4. close   5. volume Ticker  \
 date                                                                
 2018-01-02   195.78  199.500  194.50    199.35   8303752.0   NVDA   
 2018-01-03   204.10  213.700  203.75    212.47  22499518.0   NVDA   
 2018-01-04   215.76  218.050  212.69    213.59  14346675.0   NVDA   
 2018-01-05   214.19  216.910  211.08    215.40  14008768.0   NVDA   
 2018-01-08   220.40  225.000  218.58    222.00  21190932.0   NVDA   
 ...             ...      ...     ...       ...         ...    ...   
 2023-11-28   482.36  483.229  474.73    478.21  40149085.0   NVDA   
 2023-11-29   483.79  487.620  478.60    481.40  38200465.0   NVDA   
 2023-11-30   480.24  481.100  464.22    467.70  52624663.0   NVDA   
 2023-12-01   465.25  472.000  461.87    467.65  36931701.0   NVDA   
 2023-12-04   460.77  460.770  450.10    455.10  43754344.0   NVDA   
 
                   Industry      Sector  
 date                                    
 201

In [32]:
final_data = final_data.reset_index()
final_data.to_csv('raw_data.csv', index=True)        # Save to csv

In [42]:
tech_data = pd.read_csv('raw_data.csv')
tech_data = tech_data.rename(columns={'date': 'Date',
                                      '1. open': 'Open', 
                                      '2. high' : 'High', 
                                      '3. low' : 'Low',
                                      '4. close': 'Close',
                                      '5. volume': 'Volume',
                                      'Companies': 'sector'})
tech_data['sector'] = 'Technology'
tech_data = tech_data[['Date','Ticker', 'Sector', 'Industry', 'Open', 'High', 'Low', 'Close', 'Volume']]
# tech_data.to_csv('tech_data.csv', index = False)
tech_data['Date'] = pd.to_datetime(tech_data['Date'])
tech_data.to_csv('tech_data.csv')

In [44]:
pd.read_csv('tech_data.csv').drop(columns={'Unnamed: 0'})

Unnamed: 0,Date,Ticker,Sector,Industry,Open,High,Low,Close,Volume
0,2018-01-02,NVDA,Technology,Semiconductors,195.78,199.50,194.500,199.35,8303752.0
1,2018-01-03,NVDA,Technology,Semiconductors,204.10,213.70,203.750,212.47,22499518.0
2,2018-01-04,NVDA,Technology,Semiconductors,215.76,218.05,212.690,213.59,14346675.0
3,2018-01-05,NVDA,Technology,Semiconductors,214.19,216.91,211.080,215.40,14008768.0
4,2018-01-08,NVDA,Technology,Semiconductors,220.40,225.00,218.580,222.00,21190932.0
...,...,...,...,...,...,...,...,...,...
29059,2023-11-28,CTSH,Technology,Infotech,70.70,71.52,70.700,71.30,2607377.0
29060,2023-11-29,CTSH,Technology,Infotech,71.60,71.80,69.895,70.12,3521683.0
29061,2023-11-30,CTSH,Technology,Infotech,70.39,70.52,69.800,70.38,8254394.0
29062,2023-12-01,CTSH,Technology,Infotech,70.23,70.87,70.080,70.82,2672566.0
