# Ingest Zillow
Demonstrates accessing data stored as a file at a specific URL.  The data can be browsed at https://www.zillow.com/research/data/.  The specific URLs can be copied with a right click on the Download button.

In [0]:
import requests
from io import BytesIO
import pandas as pd

# This URL returns a monthly time series of number of sales in the US metro regions.
url = "https://files.zillowstatic.com/research/public_csvs/sales_count_now/Metro_sales_count_now_uc_sfrcondo_month.csv?t=1757774212"
response = requests.get(url)

# The function raise_for_status throws an exception if the response was not successful.  
response.raise_for_status()

# BytesIO is a file-like object that can be used to read the content of the response.  This enables the data to be read into a Pandas dataframe. 
file_content = BytesIO(response.content)
sales_count_pdf = pd.read_csv(file_content)
display(sales_count_pdf)


In [0]:

# Convert to a PySpark dataframe and save in the bronze schema.
sales_count_df = spark.createDataFrame(sales_count_pdf)

sales_count_df.write.mode("overwrite").saveAsTable("bronze.sales_count")
