In [None]:
pip install \
  google-cloud-storage \
  google-cloud-bigquery

In [9]:
!pip install python-dotenv
!pip install functions-framework
!pip install google-cloud-storage
from dotenv import load_dotenv
load_dotenv()

import os
import requests
import functions_framework
from google.cloud import storage

@functions_framework.http
def extract_earthquake(request):
    print('Extracting earthquake data...')
    
    # API URL with parameters
    url = 'https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2023-01-01&endtime=2023-01-02&minmagnitude=5'

    # Make the request to the earthquake API
    response = requests.get(url)
    response.raise_for_status()  # Raises an HTTPError for bad responses

    # Define the filename based on environment or a default
    filename = '/tmp/earthquake_data.json'  # Temporary file in a writable directory

    # Write the response content to a file
    with open(filename, 'wb') as f:
        f.write(response.content)

    print(f'Downloaded {filename}')

    # Optional: Upload to Google Cloud Storage
    bucket_name = os.getenv('GCS_BUCKET')
    if bucket_name:
        upload_to_gcs(bucket_name, filename)

def upload_to_gcs(bucket_name, filename):
    """Uploads file to Google Cloud Storage."""
    client = storage.Client()
    bucket = client.bucket(bucket_name)
    blob = bucket.blob(os.path.basename(filename))
    
    blob.upload_from_filename(filename)
    print(f'Uploaded {filename} to {bucket_name}')

    # Upload the downloaded file to cloud storage
    BUCKET_NAME = os.getenv('DATA_LAKE_BUCKET')
    blobname = 'raw/phl_opa_properties/phl_opa_properties.csv'

    storage_client = storage.Client()
    bucket = storage_client.bucket(BUCKET_NAME)
    blob = bucket.blob(blobname)
    blob.upload_from_filename(filename)

    print(f'Uploaded {blobname} to {BUCKET_NAME}')

    return f'Downloaded to {filename} and uploaded to gs://{BUCKET_NAME}/{blobname}'

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Collecting google-cloud-storage
  Downloading google_cloud_storage-2.16.0-py2.py3-none-any.whl.metadata (6.1 kB)
Collecting google-auth<3.0dev,>=2.26.1 (from google-cloud-storage)
  Downloading google_auth-2.29.0-py2.py3-none-any.whl.metadata (4.7 kB)
Collecting google-api-core<3.0.0dev,>=2.15.0 (from google-cloud-storage)
  Downloading google_api_core-2.18.0-py3-none-any.whl.metadata (2.7 kB)
Collecting google-cloud-core<3.0dev,>=2.3.0 (from google-cloud-storage)
  Downloading google_cloud_core-2.4.1-py2.py3-none-any.whl.metadata (2.7 kB)
Collecting google-resumable-media>=2.6.0 (from google-cloud-storage)
  Downloading google_resumable_media-2.7.0-py2.py3-none-any.whl.metadata (2.2 kB)
Collecting google-crc32c<2.0dev,>=1.0 (from google-cloud-storage)
  D

In [1]:
import requests

# Replace 'your_api_key' and 'api_endpoint' with actual API key and URL
headers = {'Authorization': 'https://earthquake.usgs.gov/fdsnws/event/1/application.json'}
response = requests.get('api_endpoint', headers=headers)
data = response.json()


MissingSchema: Invalid URL 'api_endpoint': No scheme supplied. Perhaps you meant https://api_endpoint?

In [None]:
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import statsmodels.api as sm
import pandas_gbq
import functions_framework
from google.cloud import bigquery
load_dotenv()


@functions_framework.http
def model_phl_opa_properties(request):
    client = bigquery.Client()
    query = """
        SELECT
            *
        FROM `musa509s24-team3.core.opa_properties`
    """
    properties = client.query_and_wait(query).to_dataframe()
    properties.replace('', np.nan, inplace=True)
    properties['year_built'] = properties['year_built'].astype(np.float64)
    properties['zip_code'] = properties['zip_code'].astype(np.float64)
    properties['total_livable_area'] = properties['total_livable_area'].astype(float)
    properties['total_area'] = properties['total_area'].astype(float)
    properties['sale_price'] = properties['sale_price'].astype(float)
    properties['number_stories'] = properties['number_stories'].astype(float)
    properties['number_of_bathrooms'] = properties['number_of_bathrooms'].astype(float)
    properties['number_of_bedrooms'] = properties['number_of_bedrooms'].astype(float)
    properties['garage_type'] = properties['garage_type'].astype(float)
    properties['fireplaces'] = properties['fireplaces'].astype(float)
    properties['category_code'] = properties['category_code'].astype(int)
    properties_mdl = properties[
        ['basements',
         'category_code',
         'census_tract',
         'central_air',
         'fireplaces',
         'garage_type',
         'market_value',
         'number_of_bedrooms',
         'number_of_bathrooms',
         'number_of_rooms',
         'number_stories',
         'parcel_number',
         'quality_grade',
         'sale_price',
         'type_heater',
         'total_area',
         'total_livable_area',
         'view_type',
         'building_code_description_new',
         'zip_code',
         'year_built']]
    properties_mdl['Age'] = 2024 - properties_mdl['year_built']
    properties_mdl['numRooms'] = np.select([(properties_mdl['number_of_bedrooms'].isna()) & (~properties_mdl['number_of_bathrooms'].isna()),
                                            (properties_mdl['number_of_bathrooms'].isna()) & (~properties_mdl['number_of_bedrooms'].isna()),
                                            (properties_mdl['number_of_bathrooms'].isna()) & (properties_mdl['number_of_bedrooms'].isna())],
                                           [properties_mdl['number_of_bathrooms'], properties_mdl['number_of_bedrooms'], 0], default=properties_mdl['number_of_bedrooms'] + properties_mdl['number_of_bathrooms'])
    properties_mdl['view'] = np.select([properties_mdl['view_type'].isin(['I', '0']) | properties_mdl['view_type'].isna(),
                                        properties_mdl['view_type'].isin(['A', 'B', 'C'])], ['Typical', 'Scenic'], default='Urban')
    properties_mdl['hasAC'] = np.where(properties_mdl['central_air'].isin(['1', 'Y']), 'Y', 'N')
    properties_mdl['hasBasement'] = np.where(properties_mdl['basements'].isin(['1', '4', 'A', 'B', 'C', 'D', 'E', 'F']), 'Y', 'N')
    properties_mdl['hasFireplace'] = np.where((properties_mdl['fireplaces'] == 0) | (properties_mdl['fireplaces'].isna()), 'N', 'Y')
    properties_mdl['hasGarage'] = np.where((properties_mdl['garage_type'] == 0) | (properties_mdl['garage_type'].isna()), 'N', 'Y')
    properties_mdl['stories'] = np.where(properties_mdl['number_stories'] == 1, 'single', np.where(properties_mdl['number_stories'] == 2, 'double', 'multiple'))
    properties_mdl['area'] = np.where(properties_mdl['total_livable_area'] > properties_mdl['total_area'], properties_mdl['total_livable_area'], properties_mdl['total_area'])
    properties_mdl['hasHeater'] = np.where((properties_mdl['type_heater'] == 0) | (properties_mdl['type_heater'].isna()), 'N', 'Y')
    properties_mdl['quality'] = np.where(properties_mdl['quality_grade'].isin(['4', '5', '6', 'A', 'A+', 'A-', 'B', 'B+', 'B-', 'S', 'S+', 'X-']), 'Good', 'Bad')
    properties_mdl['logarea'] = np.log(properties_mdl['area'])
    condition1 = properties_mdl['building_code_description_new'].str.contains('ROW', case=False).fillna(False).values
    condition2 = properties_mdl['building_code_description_new'].str.contains('TWIN', case=False).fillna(False).values
    properties_mdl['buildingdis'] = np.select([condition1, condition2], ['Row', 'TWIN'], default='Other')
    properties_mdl = properties_mdl[
        (properties_mdl['Age'] < 500) &
        (properties_mdl['sale_price'] < 2000000) &
        (properties_mdl['sale_price'] > 10000) &  # Include the condition for sale_price
        (properties_mdl['numRooms'] < 30) &
        (properties_mdl['total_livable_area'] != 0) &
        (~properties_mdl['total_area'].isna()) &
        (properties_mdl['area'] < 50000)]
    X = properties_mdl[['Age', 'numRooms', 'hasBasement', 'hasAC', 'quality', 'buildingdis', 'hasFireplace', 'hasGarage', 'stories', 'logarea', 'view', 'zip_code']]
    y = properties_mdl['sale_price']
    X['zip_code'] = X['zip_code'].astype(str)
    X = X.dropna(subset=['zip_code'])
    X_encoded = pd.get_dummies(X, columns=['zip_code', 'hasAC', 'hasBasement', 'quality', 'buildingdis', 'hasFireplace', 'hasGarage', 'stories', 'view'], drop_first=True)
    X_encoded = X_encoded.astype(float)
    # fit the regression here
    X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.7, random_state=42)
    reg = sm.OLS(y_train, X_train).fit()
    y_pred = reg.predict(X_train)
    results = pd.DataFrame({'Predicted_Sale_Price': y_pred})
    pandas_gbq.to_gbq(results, 'derived.opa_properties_model', project_id="musa509s24-team3", if_exists='replace')
    print('Processed data into derived.opa_properties_model')
    return 'Success'