# Scrape Upstream Companies
[Evaluate Energy](https://blog.evaluateenergy.com/list-of-u-s-oil-gas-companies) published a list of upstream, midstream and downstream oil and gas companies in 2018. We are going to scrape that list.

In [1]:
from bs4 import BeautifulSoup
import requests as req
import pandas as pd

## Scrape Company Names

In [5]:
# get html
url = 'https://blog.evaluateenergy.com/list-of-u-s-oil-gas-companies'
resp = req.get(url)

soup = BeautifulSoup(resp.text, 'lxml')

# parse list
upstream_ul = soup.find('p', string='Upstream companies').findNext('ul')
upstream_list = upstream_ul.findAll('li')
upstream_list = [x.getText().replace('\xa0', ' ') for x in upstream_list]


# create dataframe
df = pd.DataFrame(upstream_list, columns=['Company Name'])
df.head()

Unnamed: 0,Company Name
0,Abraxas Petroleum Corp.
1,Adams Resources & Energy Inc.
2,Amplify Energy Corp.
3,Anadarko Petroleum Corp.
4,Antero Resources Corp.


## Scrape Ticker Symbols

In [80]:
def GetTicker(input_name):
    """
    Scrapes yahoo finance for ticker symbols
    """
    # query yahoo finance
    url = 'https://finance.yahoo.com/lookup?s=' + input_name.replace(' ', '%20')    
    resp = req.get(url)
    soup = BeautifulSoup(resp.text, 'lxml')

    try:
        # parse result
        ticker_attrs = soup.find_all(attrs={'data-symbol':True, 'class':'Fw(b)'})
        company_ticker = [x.text for x in ticker_attrs if x.text != 'Energy'].pop()
        company_name = [x['title'] for x in ticker_attrs if x.text != 'Energy'].pop()

        return pd.Series([company_name, company_ticker], index=['Listed Name', 'Ticker'])
    except:
        return pd.Series([None, None], index=['Listed Name', 'Ticker'])
    

df[['Listed Name', 'Ticker']] = df['Company Name'].apply(GetTicker)

df

Unnamed: 0,Company Name,URL,Listed Name,Ticker
0,Abraxas Petroleum Corp.,https://finance.yahoo.com/lookup?s=Abraxas%20P...,Abraxas Petroleum Corporation,AXAS
1,Adams Resources & Energy Inc.,https://finance.yahoo.com/lookup?s=Adams%20Res...,"Adams Resources & Energy, Inc.",AE
2,Amplify Energy Corp.,https://finance.yahoo.com/lookup?s=Amplify%20E...,Amplify Energy Corp.,AMPY
3,Anadarko Petroleum Corp.,https://finance.yahoo.com/lookup?s=Anadarko%20...,,
4,Antero Resources Corp.,https://finance.yahoo.com/lookup?s=Antero%20Re...,Antero Resources Corp. Register,7A6.SG
...,...,...,...,...
114,Whiting Petroleum Corp.,https://finance.yahoo.com/lookup?s=Whiting%20P...,WHITING PETROLEUM CORP,WLL1.MX
115,WildHorse Resource Development Corp.,https://finance.yahoo.com/lookup?s=WildHorse%2...,,
116,WPX Energy Inc.,https://finance.yahoo.com/lookup?s=WPX%20Energ...,WPX Energy Inc. Registered Shar,WPE.SG
117,Yuma Energy Inc.,https://finance.yahoo.com/lookup?s=Yuma%20Ener...,YUMA ENERGY INC,YUMAQ


In [81]:
# export
out_df = df[['Company Name', 'Listed Name', 'Ticker']]

out_df.to_csv('../Data/upstream_companies.csv', sep="|", index=False)