In [None]:
import pandas as pd
import requests as rs
from bs4 import BeautifulSoup
import warnings

# Ignore all future warnings (to keep output clean)
warnings.filterwarnings("ignore", category=FutureWarning)

# 1. Load the HTML page
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/netflix_data_webpage.html"
response = rs.get(url)
html_data = response.text

# 2. Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_data, 'html.parser')

# 3. Create an empty DataFrame with defined columns
netflix_data = pd.DataFrame(columns=["Date", "Open", "High", "Low", "Close", "Volume"])

# 4. Loop through table rows and extract data
for row in soup.find("tbody").find_all("tr"):
    cols = row.find_all("td")
    date = cols[0].text.strip()
    open_price = cols[1].text.strip()
    high = cols[2].text.strip()
    low = cols[3].text.strip()
    close = cols[4].text.strip()
    volume = cols[6].text.strip().replace(",", "")  # Fix: remove commas from volume

    # Create a one-row DataFrame and append to main DataFrame
    one_row = pd.DataFrame({
        "Date": [date],
        "Open": [open_price],
        "High": [high],
        "Low": [low],
        "Close": [close],
        "Volume": [volume]
    })

    netflix_data = pd.concat([netflix_data, one_row], ignore_index=True)

# 5. Convert string columns to numeric
numeric_cols = ["Open", "High", "Low", "Close", "Volume"]
netflix_data[numeric_cols] = netflix_data[numeric_cols].apply(pd.to_numeric, errors='coerce')

# 6. Final DataFrame
print(netflix_data)

# 7. Save to CSV and download
csv_filename = "new_netflix_stock_data.csv"
netflix_data.to_csv(csv_filename, index=False)

from google.colab import files
files.download(csv_filename)



            Date    Open    High     Low   Close     Volume
0   Jun 01, 2021  504.01  536.13  482.14  528.21   78560600
1   May 01, 2021  512.65  518.95  478.54  502.81   66927600
2   Apr 01, 2021  529.93  563.56  499.00  513.47  111573300
3   Mar 01, 2021  545.57  556.99  492.85  521.66   90183900
4   Feb 01, 2021  536.79  566.65  518.28  538.85   61902300
..           ...     ...     ...     ...     ...        ...
65  Jan 01, 2016  109.00  122.18   90.11   91.84  488193200
66  Dec 01, 2015  124.47  133.27  113.85  114.38  319939200
67  Nov 01, 2015  109.20  126.60  101.86  123.33  320321800
68  Oct 01, 2015  102.91  115.83   96.26  108.38  446204400
69  Sep 01, 2015  109.35  111.24   93.55  103.26  497401200

[70 rows x 6 columns]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>