# INSY 5336 - Python Programming - Spring 2020 - Final Term Project

# Question 1 - Project to scrape data from the web and store the results in a test or csv file.

## High Level Algorithm:

1. Receive the content from the CNN Money’s Market Movers website and parse the data using lxml parser
2. Open a csv file in the write mode and scrape the data with all the html tags in the website
3. Use the Yahoo Finance URL, to get the details of each stock with the help of Company abbreviation or symbol.
4. Scrape the data and get the values of OPEN price, PREV CLOSE price, VOLUME, MARKET CAP for the each stock.
5. Write the scraped contents in the csv file in the format Category, Company symbol, Company Name, OPEN price, PREV CLOSE price, VOLUME, MARKET CAP.
6. The program asks for the user's input for the company symbol that the user wants details for.
7. Display the contents of the desired stock.

## Instructions:

1. Run the cell containing the program (Alt+Enter).
2. Please wait for a minute or two. The list of Most Actives, Gainers and Losers will be displayed.
3. Enter the the user input and press Enter.
4. Results are displayed.

In [None]:
from bs4 import BeautifulSoup
import requests
import csv

# Get the web content by using the requests library module
source = requests.get('https://money.cnn.com/data/hotstocks/').text

# Parsing the web content for XML data using lxml parser
soup = BeautifulSoup(source, 'lxml')

# Getting the table of contents of the most active companies, Gainers and Losers
table = soup.find_all(class_ = "wsod_dataTable wsod_dataTableBigAlt") 

# Cretaing a List for the Company symbols
symbolList = []

# Cretaing a List for the Company names
companyNameList = []

# Creating a list of Categories - Most Actives, Gainers and Losers
activeGainerLoser = ["Most Actives", "Gainers", "Losers"]

# Cretaing a dictionary with key as Company symbols and value as one of the categories (Most Actives, Gainers and Losers)
stocksDictionary = {}

# Category count
category = 0

# Open HotStocks.csv file if present or create one. Open the csv file in the write mode with comma delimiter
with open('HotStocks.csv', 'w', newline = '') as csv_file:
    csv_writer = csv.writer(csv_file, delimiter = ",")
    
    # For every entry within the tag <table></table>, get or scrape all the rows
    for entry in table:
        tableRows = entry.find_all('tr')

        # For every row in list of rows within tag <tr></tr>, get or scrape all the rows
        for tableRow in tableRows:
            # Get or scrape all the rows in the first column
            tableDetail = tableRow.find_all('td')[0:1]

            # Go through every <td> tag and get or scrape the Company symbol
            for detail in tableDetail:
                companySymbol = detail.find("a", attrs = {'class': "wsod_symbol"}).get_text()
                # Append the company symbols to a list
                symbolList.append(companySymbol)

                # Get or scrape the name of the company and append the company names to a list
                name = detail.find('span')
                comapnyName = name.get_text()
                companyNameList.append(comapnyName)

                # Create a dictionary with key as Company symbols and value as one of the categories (Most Actives, Gainers and Losers)
                stocksDictionary[companySymbol] = activeGainerLoser[category]

                # Format the Yahoo Finance URL to replace the company symbol with the desired one
                # Get the web content by using the requests library module and parse the web content using lxml parser
                url = 'https://finance.yahoo.com/quote/{}?'.format(companySymbol)
                quote = requests.get(url).text
                soup = BeautifulSoup(quote, 'lxml')

                # Scrape the web content within the tag <div></div> and with HTML attribute id
                tableQuote = soup.find("div", attrs = {'id': "quote-summary"})

                # Get the exact value of the four parameters in the website - OPEN price, PREV CLOSE price, VOLUME, MARKET CAP
                prevCloseValue = tableQuote.find("td", attrs = {'data-test': "PREV_CLOSE-value"}).get_text()
                openValue = tableQuote.find("td", attrs = {'data-test': "OPEN-value"}).get_text()
                volumeValue = tableQuote.find("td", attrs = {'data-test': "TD_VOLUME-value"}).get_text()
                marketCapValue = tableQuote.find("td", attrs = {'data-test': "MARKET_CAP-value"}).get_text()
                
                # Write the values into the csv file opened in write mode with proper format
                csv_writer.writerow([stocksDictionary.get(companySymbol), companySymbol, comapnyName, prevCloseValue, openValue, volumeValue, marketCapValue])
        
        # Incrementing the category count
        category += 1

# Close the csv file to avoid memory issues or undesired results
csv_file.close()

print("This is a program to scrape data from the https://money.cnn.com/data/hotstocks/  for a class project.\n")
print("Which stock are you interested in: ")

# Open the HotStocks.csv file in read mode
# Read the "Most Actives" category of stocks and display
with open('HotStocks.csv', 'r', newline = '') as csv_file:
    csv_reader = csv.reader(csv_file)
    
    print("\nMost Actives:")
    
    # For every row in the csv file, if the first column is "Most Actives", diplay the next two columns
    for row in csv_reader:
        if (row[0] == "Most Actives"):
            # Display the columns Company symbol and Company name
            print(row[1] + " " + row[2])
csv_file.close()

# Open the HotStocks.csv file in read mode
# Read the "Gainers" category of stocks and display
with open('HotStocks.csv', 'r', newline = '') as csv_file:
    csv_reader = csv.reader(csv_file)
    
    print("\nGainers:")
    
    # For every row in the csv file, if the first column is "Gainers", diplay the next two columns
    for row in csv_reader:
        if (row[0] == "Gainers"):
            # Display the columns Company symbol and Company name
            print(row[1] + " " + row[2])
csv_file.close()

# Open the HotStocks.csv file in read mode
# Read the "Losers" category of stocks and display
with open('HotStocks.csv', 'r', newline = '') as csv_file:
    csv_reader = csv.reader(csv_file)
    
    print("\nLosers:")
    
    # For every row in the csv file, if the first column is "Losers", diplay the next two columns
    for row in csv_reader:
        if (row[0] == "Losers"):
            # Display the columns Company symbol and Company name
            print(row[1] + " " + row[2])
csv_file.close()

# Prompt the user to give the desired company symbol or abbreviation
userInput = input("\nUser inputs: ")

# Open the HotStocks.csv file in read mode
# Get the user input for the Company symbol and display the results
with open('HotStocks.csv', 'r', newline = '') as csv_file:
    csv_reader = csv.reader(csv_file)
    
    company_name = ""
    for row in csv_reader:
        # Check if the user input is equal to the company symbol in the csv file
        if (userInput.upper() == row[1]):
            # Get the Company name
            company_name = row[2]
    
    if (company_name):
        print("\nThe data for "+ userInput.upper() + " " + company_name + " is the following: \n")
            
csv_file.close()

# Open the HotStocks.csv file in read mode
# Get the user input for the Company symbol and display the results
with open('HotStocks.csv', 'r', newline = '') as csv_file:
    csv_reader = csv.reader(csv_file)
    
    company_symbol = ""
    dataIsPresent = False
    for row in csv_reader:
        # Check if the user input is equal to the company symbol in the csv file
        if (userInput.upper() == row[1]):
            # Diplay the corresponding Company symbol, name, Open, Previous Close, Volume and Market Cap values
            dataIsPresent = True
            print(row[1] + " " + row[2])
            print("OPEN: " + row[4])
            print("PREV CLOSE: " + row[3])
            print("VOLUME: " + row[5])
            print("MARKET CAP: " + row[6])
    
    # If the company is not present in the list then display
    if (dataIsPresent == False):
        print("\nThere is no such company among the three categories - Most Actives, Gainers and Losers")

csv_file.close()