In [4]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf, col
from pyspark.sql.types import StringType
import urllib.request
import json
import time

# Initialize SparkSession
spark = SparkSession.builder \
    .appName("WeatherAlerts") \
    .getOrCreate()

# Function to fetch weather alerts using Weather.gov API
def fetch_weather_alerts(lat, lon):
    try:
        url = f"https://api.weather.gov/alerts/active?point={lat},{lon}"
        headers = {"User-Agent": "MyWeatherApp (johndauphine@hotmail.com)"}
        
        req = urllib.request.Request(url, headers=headers)
        with urllib.request.urlopen(req, timeout=10) as response:
            if response.status == 200:
                data = json.loads(response.read().decode('utf-8'))
                alerts = data.get('features', [])
                if not alerts:
                    return "No alerts"
                # Include both headline and sent date in the output
                return "; ".join([
                    f"{alert.get('properties', {}).get('headline', 'No headline')} (sent: {alert.get('properties', {}).get('sent', 'N/A')})"
                    for alert in alerts
                ])
            else:
                return f"Error: HTTP {response.status}"
    except urllib.error.HTTPError as e:
        return f"HTTPError: {e.code} {e.reason}"
    except urllib.error.URLError as e:
        return f"URLError: {e.reason}"
    except Exception as e:
        return f"Exception: {str(e)}"

# Register the function as a UDF
fetch_weather_alerts_udf = udf(fetch_weather_alerts, StringType())

# Example DataFrame with latitude and longitude
data = [
    {"city": "Miami, FL", "latitude": 25.7617, "longitude": -80.1918},
    {"city": "Los Angeles, CA", "latitude": 34.0522, "longitude": -118.2437},
    {"city": "Ferriday, LA", "latitude": 31.6291, "longitude": -91.5559},
    {"city": "New Orleans, LA", "latitude": 29.9511, "longitude": -90.0715},
    {"city": "New York, NY", "latitude": 40.7128, "longitude": -74.0060},
    {"city": "Chicago, IL", "latitude": 41.8781, "longitude": -87.6298},
    {"city": "Houston, TX", "latitude": 29.7604, "longitude": -95.3698},
    {"city": "Phoenix, AZ", "latitude": 33.4484, "longitude": -112.0740},
    {"city": "Denver, CO", "latitude": 39.7392, "longitude": -104.9903},
    {"city": "Seattle, WA", "latitude": 47.6062, "longitude": -122.3321},
    {"city": "Washington, DC", "latitude": 38.9072, "longitude": -77.0369},
    {"city": "Boston, MA", "latitude": 42.3601, "longitude": -71.0589},
    {"city": "Dallas, TX", "latitude": 32.7767, "longitude": -96.7970},
    {"city": "San Francisco, CA", "latitude": 37.7749, "longitude": -122.4194},
    {"city": "Nashville, TN", "latitude": 36.1627, "longitude": -86.7816},
    {"city": "Philadelphia, PA", "latitude": 39.9526, "longitude": -75.1652},
    {"city": "Charlotte, NC", "latitude": 35.2271, "longitude": -80.8431},
    {"city": "Minneapolis, MN", "latitude": 44.9778, "longitude": -93.2650},
    {"city": "Portland, OR", "latitude": 45.5051, "longitude": -122.6750},
    {"city": "Indianapolis, IN", "latitude": 39.7684, "longitude": -86.1581},
    {"city": "Las Vegas, NV", "latitude": 36.1699, "longitude": -115.1398},
    {"city": "Milwaukee, WI", "latitude": 43.0389, "longitude": -87.9065},
    {"city": "Memphis, TN", "latitude": 35.1495, "longitude": -90.0490},
    {"city": "Tampa, FL", "latitude": 27.9506, "longitude": -82.4572},
]
df = spark.createDataFrame(data)

# Add a column with weather alerts (headline and sent date)
df_with_alerts = df.withColumn("alerts", fetch_weather_alerts_udf(col("latitude"), col("longitude")))

# Show results
df_with_alerts.show(truncate=False)

# Stop SparkSession
spark.stop()

                                                                                

+-----------------+--------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|city             |latitude|longitude|alerts                                                                                                                                                                                                                                                                   |
+-----------------+--------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|Miami, FL        |25.7617 |-80.1918 |No alerts                                      

In [7]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf, col, current_timestamp
from pyspark.sql.types import StringType
import urllib.request
import json
import time

# Initialize SparkSession
spark = SparkSession.builder \
    .appName("WeatherAlerts") \
    .getOrCreate()

# Function to fetch weather alerts using Weather.gov API
def fetch_weather_alerts(lat, lon):
    try:
        url = f"https://api.weather.gov/alerts/active?point={lat},{lon}"
        headers = {"User-Agent": "MyWeatherApp (johndauphine@hotmail.com)"}
        
        req = urllib.request.Request(url, headers=headers)
        with urllib.request.urlopen(req, timeout=10) as response:
            if response.status == 200:
                data = json.loads(response.read().decode('utf-8'))
                alerts = data.get('features', [])
                if not alerts:
                    return "No alerts"
                # Include both headline and sent date in the output
                return "; ".join([
                    f"{alert.get('properties', {}).get('headline', 'No headline')} (sent: {alert.get('properties', {}).get('sent', 'N/A')})"
                    for alert in alerts
                ])
            else:
                return f"Error: HTTP {response.status}"
    except urllib.error.HTTPError as e:
        return f"HTTPError: {e.code} {e.reason}"
    except urllib.error.URLError as e:
        return f"URLError: {e.reason}"
    except Exception as e:
        return f"Exception: {str(e)}"

# Register the function as a UDF
fetch_weather_alerts_udf = udf(fetch_weather_alerts, StringType())

# Example DataFrame with latitude and longitude
data = [
    {"city": "Miami, FL", "latitude": 25.7617, "longitude": -80.1918},
    {"city": "Los Angeles, CA", "latitude": 34.0522, "longitude": -118.2437},
    {"city": "Ferriday, LA", "latitude": 31.6291, "longitude": -91.5559},
    {"city": "New Orleans, LA", "latitude": 29.9511, "longitude": -90.0715},
    {"city": "New York, NY", "latitude": 40.7128, "longitude": -74.0060},
    {"city": "Chicago, IL", "latitude": 41.8781, "longitude": -87.6298},
    {"city": "Houston, TX", "latitude": 29.7604, "longitude": -95.3698},
    {"city": "Phoenix, AZ", "latitude": 33.4484, "longitude": -112.0740},
    {"city": "Denver, CO", "latitude": 39.7392, "longitude": -104.9903},
    {"city": "Seattle, WA", "latitude": 47.6062, "longitude": -122.3321},
    {"city": "Washington, DC", "latitude": 38.9072, "longitude": -77.0369},
    {"city": "Boston, MA", "latitude": 42.3601, "longitude": -71.0589},
    {"city": "Dallas, TX", "latitude": 32.7767, "longitude": -96.7970},
    {"city": "San Francisco, CA", "latitude": 37.7749, "longitude": -122.4194},
    {"city": "Nashville, TN", "latitude": 36.1627, "longitude": -86.7816},
    {"city": "Philadelphia, PA", "latitude": 39.9526, "longitude": -75.1652},
    {"city": "Charlotte, NC", "latitude": 35.2271, "longitude": -80.8431},
    {"city": "Minneapolis, MN", "latitude": 44.9778, "longitude": -93.2650},
    {"city": "Portland, OR", "latitude": 45.5051, "longitude": -122.6750},
    {"city": "Indianapolis, IN", "latitude": 39.7684, "longitude": -86.1581},
    {"city": "Las Vegas, NV", "latitude": 36.1699, "longitude": -115.1398},
    {"city": "Milwaukee, WI", "latitude": 43.0389, "longitude": -87.9065},
    {"city": "Memphis, TN", "latitude": 35.1495, "longitude": -90.0490},
    {"city": "Tampa, FL", "latitude": 27.9506, "longitude": -82.4572},
]
df = spark.createDataFrame(data)

# Add a column with weather alerts (headline and sent date)
df_with_alerts = df.withColumn("alerts", fetch_weather_alerts_udf(col("latitude"), col("longitude")))

# Add a column with the current datetime
df_with_alerts = df_with_alerts.withColumn("queried_at", current_timestamp())

# Show results
df_with_alerts.show(truncate=False)

# Stop SparkSession
spark.stop()

                                                                                

+-----------------+--------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------+
|city             |latitude|longitude|alerts                                                                                                                                                                                                                                                                   |queried_at              |
+-----------------+--------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------+
|Miami, FL