# Scrape Upstream Companies
[Evaluate Energy](https://blog.evaluateenergy.com/list-of-u-s-oil-gas-companies) published a list of upstream, midstream and downstream oil and gas companies in 2018. We are going to scrape that list.

In [1]:
from bs4 import BeautifulSoup
import requests as req
import pandas as pd

## Scrape Company Names

In [5]:
# get html
url = 'https://blog.evaluateenergy.com/list-of-u-s-oil-gas-companies'
resp = req.get(url)

soup = BeautifulSoup(resp.text, 'lxml')

# parse list
upstream_ul = soup.find('p', string='Upstream companies').findNext('ul')
upstream_list = upstream_ul.findAll('li')
upstream_list = [x.getText().replace('\xa0', ' ') for x in upstream_list]


# create dataframe
df = pd.DataFrame(upstream_list, columns=['Company Name'])
df.head()

Unnamed: 0,Company Name
0,Abraxas Petroleum Corp.
1,Adams Resources & Energy Inc.
2,Amplify Energy Corp.
3,Anadarko Petroleum Corp.
4,Antero Resources Corp.


## Scrape Ticker Symbols

In [48]:
def GenerateQuery(input_str):
    output_str = 'https://finance.yahoo.com/lookup?s=' + input_str.replace(' ', '%20')
    return output_str

df['URL'] = df['Company Name'].apply(GenerateQuery)



In [51]:
ticker_attrs = soup.find_all(attrs={'data-symbol':True, 'class':'Fw(b)'})
ticker_list = [x.text for x in ticker_attrs if x.text != 'Energy']
ticker_list

['AXAS']

In [53]:

ticker_list

['AE']

In [71]:
def GetTicker(input_name):
    """
    Scrapes yahoo finance for ticker symbols
    """
    # query yahoo finance
    url = 'https://finance.yahoo.com/lookup?s=' + input_name.replace(' ', '%20')    
    resp = req.get(url)
    soup = BeautifulSoup(resp.text, 'lxml')

    # parse result
    ticker_attrs = soup.find_all(attrs={'data-symbol':True, 'class':'Fw(b)'})
    company_ticker = [x.text for x in ticker_attrs if x.text != 'Energy'].pop()
    company_name = [x['title'] for x in ticker_attrs if x.text != 'Energy'].pop()

    return company_name, company_ticker


('Adams Resources & Energy, Inc.', 'AE')

In [56]:
ticker_attrs

[<a class="Fw(b)" data-reactid="57" data-symbol="AMPY" href="/quote/AMPY?p=AMPY" title="Amplify Energy Corp.">AMPY</a>,
 <a class="Fw(b)" data-reactid="61" data-symbol="AMPY" href="https://finance.yahoo.com/sector/energy" title="Energy">Energy</a>]

In [44]:
# export
df.to_csv('../Data/upstream_companies.csv', sep="|", index=False)