# Stock Market data
### Data will be fetched from https://markets.financialcontent.com/ only
### Initially the data fetched is of Apple, Google, Microsoft

In [1]:
import pandas as pd
import numpy as np
import bs4 as bs
import urllib.request as ur
import matplotlib.pyplot as plt
from matplotlib import style
import pickle

### Function to get the page-source of the company's url

In [2]:
def get_page_source(url):
    with ur.urlopen(url) as page_source:
        return page_source.read()

### Function to scrape the page and return the data as a list

In [3]:
def scraping(company):
    scrape_data = []
    page_url = "https://markets.financialcontent.com/stocks/quote/historical?Symbol=%s&Month=6&Year=2018&Range=12"
    source = get_page_source(page_url % company)
    soup = bs.BeautifulSoup(source, "lxml")
    table_data = soup.find("table", class_="quote_detailed_price_table data").find_all("tr")
    for row in table_data:
        r_data = row.find_all("td")
        scrape_data.append([data.text for data in r_data])
    return scrape_data

### Function to create the DataFrame with the scraped_data

In [4]:
def create_dframe(scrape_data):
    return pd.DataFrame(scrape_data, columns=["Date", "Open", "High", "Low", "Close", "Volume", "Change(%)"]).set_index("Date")

### Funtion to store the DataFrame as .csv file

In [5]:
def to_csv(frame, name):
    frame.to_csv(f"{name}_data.csv", encoding="utf-8")

### Function to remove the commas from Volume, change data type to float and remove all NaN values

In [6]:
def data_cleaning(company):
    company["Volume"] = company["Volume"].str.replace(",", "").astype(float)
    company.dropna(how="any", inplace=True)

In [7]:
companies = ("AAPL", "GOOGL", "MSFT")

### Get the page-source, create DataFrame and store the data as .csv

In [8]:
for company in companies:
    scrape_data = scraping(company)
    frame = create_dframe(scrape_data[1:])
    to_csv(frame, company)

### Get the from local storage to do further analysis

In [9]:
apple = pd.read_csv("AAPL_data.csv", index_col="Date")
google = pd.read_csv("GOOGL_data.csv", index_col="Date")
microsoft = pd.read_csv("MSFT_data.csv", index_col="Date")

In [10]:
companies = (apple, google, microsoft)

### Converting Volume to float and removing all NaN values

In [11]:
for company in companies:
    data_cleaning(company)

In [12]:
apple

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change(%)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Jun 22, 2018",186.12,186.15,184.70,184.92,27200447.0,-0.54(-0.29%)
"Jun 21, 2018",187.25,188.35,184.98,185.46,25707173.0,-1.04(-0.56%)
"Jun 20, 2018",186.35,187.20,185.73,186.50,20617664.0,+0.81(+0.44%)
"Jun 19, 2018",185.14,186.33,183.45,185.69,33561305.0,-3.05(-1.62%)
"Jun 18, 2018",187.88,189.22,187.21,188.74,18467215.0,-0.10(-0.05%)
"Jun 14, 2018",191.55,191.57,190.22,190.80,21599788.0,+0.10(+0.05%)
"Jun 13, 2018",192.42,192.88,190.44,190.70,21634767.0,-1.58(-0.82%)
"Jun 12, 2018",191.38,192.61,191.15,192.28,16897920.0,+1.05(+0.55%)
"Jun 11, 2018",191.35,191.97,190.21,191.23,18301665.0,-0.47(-0.25%)
"Jun 08, 2018",191.17,192.00,189.77,191.70,26656799.0,-1.76(-0.91%)
