## Importing stock listing data from Excel



### Import stock listing info from the NASDAQ

In [1]:
# Import pandas library
import pandas as pd

# Import the data
nasdaq = pd.read_csv('nasdaq-listings.csv')

# Display first 10 rows
display(nasdaq.head(10))

# Inspect nasdaq
nasdaq.info()

Unnamed: 0,Stock Symbol,Company Name,Last Sale,Market Capitalization,IPO Year,Sector,Industry,Last Update
0,AAPL,Apple Inc.,141.05,740000000000.0,1980,Technology,Computer Manufacturing,4/26/17
1,GOOGL,Alphabet Inc.,840.18,581000000000.0,NAN,Technology,"Computer Software: Programming, Data Processing",4/24/17
2,GOOG,Alphabet Inc.,823.56,569000000000.0,2004,Technology,"Computer Software: Programming, Data Processing",4/23/17
3,MSFT,Microsoft Corporation,64.95,502000000000.0,1986,Technology,Computer Software: Prepackaged Software,4/26/17
4,AMZN,"Amazon.com, Inc.",884.67,422000000000.0,1997,Consumer Services,Catalog/Specialty Distribution,4/24/17
5,FB,"Facebook, Inc.",139.39,403000000000.0,2012,Technology,"Computer Software: Programming, Data Processing",4/26/17
6,CMCSA,Comcast Corporation,37.14,176000000000.0,NAN,Consumer Services,Television Services,4/26/17
7,INTC,Intel Corporation,35.25,166000000000.0,NAN,Technology,Semiconductors,4/23/17
8,CSCO,"Cisco Systems, Inc.",32.42,162000000000.0,1990,Technology,Computer Communications Equipment,4/23/17
9,AMGN,Amgen Inc.,161.61,119000000000.0,1983,Health Care,Biotechnology: Biological Products (No Diagnos...,4/24/17


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1115 entries, 0 to 1114
Data columns (total 8 columns):
Stock Symbol             1115 non-null object
Company Name             1115 non-null object
Last Sale                1115 non-null float64
Market Capitalization    1115 non-null float64
IPO Year                 1115 non-null object
Sector                   1115 non-null object
Industry                 1115 non-null object
Last Update              1115 non-null object
dtypes: float64(2), object(6)
memory usage: 69.8+ KB


### Read data using .read_csv() with adequate parsing arguments

In [2]:
# Import the data
nasdaq = pd.read_csv('nasdaq-listings.csv', na_values='NAN', parse_dates=['Last Update'])

# Display the head of the data
display(nasdaq.head())

# Inspect the data
nasdaq.info()

Unnamed: 0,Stock Symbol,Company Name,Last Sale,Market Capitalization,IPO Year,Sector,Industry,Last Update
0,AAPL,Apple Inc.,141.05,740000000000.0,1980.0,Technology,Computer Manufacturing,2017-04-26
1,GOOGL,Alphabet Inc.,840.18,581000000000.0,,Technology,"Computer Software: Programming, Data Processing",2017-04-24
2,GOOG,Alphabet Inc.,823.56,569000000000.0,2004.0,Technology,"Computer Software: Programming, Data Processing",2017-04-23
3,MSFT,Microsoft Corporation,64.95,502000000000.0,1986.0,Technology,Computer Software: Prepackaged Software,2017-04-26
4,AMZN,"Amazon.com, Inc.",884.67,422000000000.0,1997.0,Consumer Services,Catalog/Specialty Distribution,2017-04-24


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1115 entries, 0 to 1114
Data columns (total 8 columns):
Stock Symbol             1115 non-null object
Company Name             1115 non-null object
Last Sale                1115 non-null float64
Market Capitalization    1115 non-null float64
IPO Year                 593 non-null float64
Sector                   1036 non-null object
Industry                 1036 non-null object
Last Update              1115 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(3), object(4)
memory usage: 69.8+ KB


### Load listing info from a single sheet

In [3]:
# Import the data
nyse = pd.read_excel('listings.xlsx', sheet_name='nyse', na_values='n/a')

# Display the head of the data
display(nyse.head())

# Inspect the data
nyse.info()

Unnamed: 0,Stock Symbol,Company Name,Last Sale,Market Capitalization,IPO Year,Sector,Industry
0,DDD,3D Systems Corporation,14.48,1647165000.0,,Technology,Computer Software: Prepackaged Software
1,MMM,3M Company,188.65,112736600000.0,,Health Care,Medical/Dental Instruments
2,WBAI,500.com Limited,13.96,579312900.0,2013.0,Consumer Services,Services-Misc. Amusement & Recreation
3,WUBA,58.com Inc.,36.11,5225238000.0,2013.0,Technology,"Computer Software: Programming, Data Processing"
4,AHC,A.H. Belo Corporation,6.2,134735100.0,,Consumer Services,Newspapers/Magazines


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3147 entries, 0 to 3146
Data columns (total 7 columns):
Stock Symbol             3147 non-null object
Company Name             3147 non-null object
Last Sale                3079 non-null float64
Market Capitalization    3147 non-null float64
IPO Year                 1361 non-null float64
Sector                   2177 non-null object
Industry                 2177 non-null object
dtypes: float64(3), object(4)
memory usage: 172.2+ KB


### Load listing data from two sheets

In [4]:
# Create pd.ExcelFile() object
xls = pd.ExcelFile('listings.xlsx')

# Extract sheet names and store in exchanges
exchanges = xls.sheet_names

# Create listings dictionary with all sheet data
listings =pd.read_excel(xls, sheet_name=exchanges, na_values='n/a')
# Inspect NASDAQ listings
listings['nasdaq'].info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3167 entries, 0 to 3166
Data columns (total 7 columns):
Stock Symbol             3167 non-null object
Company Name             3167 non-null object
Last Sale                3165 non-null float64
Market Capitalization    3167 non-null float64
IPO Year                 1386 non-null float64
Sector                   2767 non-null object
Industry                 2767 non-null object
dtypes: float64(3), object(4)
memory usage: 173.3+ KB


### Load all listing data and iterate over key-value dictionary pairs

In [5]:
# Import the NYSE and NASDAQ listings
nyse = pd.read_excel('listings.xlsx', sheet_name='nyse', na_values='n/a')
nasdaq = pd.read_excel('listings.xlsx', sheet_name='nasdaq', na_values='n/a')

# Inspect nyse and nasdaq
nyse.info()
nasdaq.info()

# Add Exchange reference columns
nyse['Exchange'] = 'NYSE'
nasdaq['Exchange'] = 'NASDAQ'

# Concatenate DataFrames  
combined_listings = pd.concat([nyse, nasdaq])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3147 entries, 0 to 3146
Data columns (total 7 columns):
Stock Symbol             3147 non-null object
Company Name             3147 non-null object
Last Sale                3079 non-null float64
Market Capitalization    3147 non-null float64
IPO Year                 1361 non-null float64
Sector                   2177 non-null object
Industry                 2177 non-null object
dtypes: float64(3), object(4)
memory usage: 172.2+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3167 entries, 0 to 3166
Data columns (total 7 columns):
Stock Symbol             3167 non-null object
Company Name             3167 non-null object
Last Sale                3165 non-null float64
Market Capitalization    3167 non-null float64
IPO Year                 1386 non-null float64
Sector                   2767 non-null object
Industry                 2767 non-null object
dtypes: float64(3), object(4)
memory usage: 173.3+ KB


### Automate the loading and combining of data from multiple Excel worksheets

In [6]:
# Create the pd.ExcelFile() object
xls = pd.ExcelFile('listings.xlsx')

# Extract the sheet names from xls
exchanges = xls.sheet_names

# Create an empty list: listings
listings = []

# Import the data
for exchange in exchanges:
    listing = pd.read_excel(xls, sheet_name=exchange, na_values='n/a')
    listing['Exchange'] = exchange
    listings.append(listing)

# Concatenate the listings: listing_data
listing_data = pd.concat(listings)

# Inspect the results
listing_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6674 entries, 0 to 3146
Data columns (total 8 columns):
Stock Symbol             6674 non-null object
Company Name             6674 non-null object
Last Sale                6590 non-null float64
Market Capitalization    6674 non-null float64
IPO Year                 2852 non-null float64
Sector                   5182 non-null object
Industry                 5182 non-null object
Exchange                 6674 non-null object
dtypes: float64(3), object(5)
memory usage: 469.3+ KB


## Importing financial data from the web


### Get stock data for a single company

In [7]:
# # Import DataReader
# from pandas_datareader.data import DataReader
# # Import date
# from datetime import date

# # Set start and end dates
# start = date(2016, 1, 1)
# end = date(2016, 12, 31)

# # Set the ticker
# ticker = 'AAPL'

# # Set the data source
# data_source = 'iex'

# # Import the stock prices
# stock_prices = DataReader(ticker, data_source, start, end)

# # Display and inspect the result
# print(stock_prices.head())
# stock_prices.info()

### Visualize a stock price trend

In [8]:
# # Import matplotlib.pyplot
# import matplotlib.pyplot as plt

# # Set start and end dates
# start = date(2016,1,1)
# end = date(2016,12,31)

# # Set the ticker and data_source
# ticker = 'FB'
# data_source = 'iex'

# # Import the data using DataReader
# stock_prices = DataReader(ticker, data_source, start, end)

# # Plot close
# stock_prices['close'].plot(title=ticker)

# # Show the plot
# plt.show()

### Visualize the long-term gold price trend

In [9]:
# # Set start date
# start = date(1968,1,1)

# # Set series code
# series = 'GOLDAMGBD228NLBM'

# # Import the data
# gold_price = DataReader(series, 'fred', start=start)

# # Inspect the price of gold
# gold_price.info()

# # Plot the price of gold
# gold_price.plot(title='Gold Price')

# # Show the plot
# plt.show()

### Compare labor market participation and unemployment rates

In [10]:
# # Set the start date
# start = date(1950,1,1)

# # Define the series codes
# series = ['UNRATE', 'CIVPART']

# # Import the data
# econ_data = DataReader(series, 'fred', start)

# # Assign new column labels
# econ_data.columns = ['Unemployment Rate','Participation Rate']

# # Plot econ_data
# econ_data.plot(subplots=True, title='Labor Market')

# # Show the plot
# plt.show()

### Compare bond and stock performance

In [11]:
# # Set the start date
# start = date(2008, 1, 1)

# # Set the series codes
# series = ['BAMLHYH0A0HYM2TRIV', 'SP500']

# # Import the data
# data = DataReader(series, 'fred', start)

# # Plot the results
# data.plot(subplots=True, title='Performance Comparison')

# # Show the plot
# plt.show()

### Select the top 5 listed consumer companies

In [15]:
# Select companies in Consumer Services
consumer_services = listing_data[listing_data.Sector == 'Consumer Services']

# Sort consumer_services by market cap
consumer_services2 = consumer_services.sort_values('Market Capitalization', ascending=False)

# Display first 5 rows of designated columns
print(consumer_services2[['Company Name', 'Exchange', 'Market Capitalization']].head())

                   Company Name Exchange  Market Capitalization
4              Amazon.com, Inc.   nasdaq           4.221385e+11
3013      Wal-Mart Stores, Inc.     nyse           2.218646e+11
3014  Walt Disney Company (The)     nyse           1.789973e+11
6           Comcast Corporation   nasdaq           1.764745e+11
1438     Home Depot, Inc. (The)     nyse           1.752626e+11


### Get the ticker of the largest consumer services company

In [None]:
# # Set the index of listings to Stock Symbol
# listings_ss = listing_data.set_index('Stock Symbol')

# # Get ticker of the largest Consumer Services company
# ticker = listings_ss.loc[listings_ss['Sector']=='Consumer Services', 'Market Capitalization'].idxmax()

# # Set the start date
# start = date(2015,1,1)

# # Import the stock data
# data = DataReader(ticker, 'iex', start)

# # Plot close and volume
# data[['close', 'volume']].plot(secondary_y='volume', title=ticker)

# # Show the plot
# plt.show()