In [34]:
!pip install google-cloud
!pip install google
!pip install matplotlib

Collecting matplotlib
  Downloading matplotlib-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.8 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.8 kB)
Collecting cycler>=0.10 (from matplotlib)
  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.51.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (159 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m159.5/159.5 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m[31m3.0 MB/s[0m eta [36m0:00:01[0m
[?25hCollecting kiwisolver>=1.3.1 (from matplotlib)
  Downloading kiwisolver-1.4.5-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (6.4 kB)
Collecting numpy>=1.21 (from matplotlib)
  Downloading numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.wh

In [35]:
# https://dev.to/wachuka_james/building-a-weather-data-pipeline-with-pyspark-prefect-and-google-cloud-19k8

from pyspark.sql import SparkSession
# from google.cloud import storage
# from google.cloud import bigquery

import requests
import json
# from google.cloud.exceptions import NotFound
import random

import matplotlib.pyplot as plt

In [26]:
# OpenWeatherMap API and base url
api_key = ""
base_url = "https://api.openweathermap.org/data/2.5/weather"

# city = ['San Antonio']
cities = ['San Antonio', 'Austin', 'Dallas', 'Houston']

spark = SparkSession.builder.appName("WeatherData").getOrCreate()


In [18]:
spark

In [49]:
def fetch_weather_data(city):
    # Fetch request from OpenWeatherMap API for city weather
    params = {"q": city, "appid": api_key, "units": "metric"}
    response = requests.get(base_url, params=params)
    data = response.json()
        
    # Extract the weather data from the API response
    # use https://m.openweathermap.org/current to find which data to add
    temp = data["main"]["temp"]
    humidity = data["main"]["humidity"]
    wind_speed = data["wind"]["speed"]
    # longitude
    # latitute
    # rain = data["rain"]["rain"]

    # TODO: add precipations
    
    df = spark.createDataFrame([(city, temp, humidity, wind_speed)],
                               ["City", "Temperature", "Humidity", "WindSpeed"])
    return df

In [52]:
# fetch_weather_data function to fetch weather data
weather_data = None
for city in cities:
    city_weather_data = fetch_weather_data(city)
    if weather_data is None:
        weather_data = city_weather_data
    else:
        weather_data = weather_data.union(city_weather_data)

# Basic processing and transformation on the weather
weather_data = weather_data.filter("temperature > 10") \
                           .groupBy("City") \
                           .agg({"Temperature": "avg", "Humidity": "avg", "WindSpeed": "max"}) \
                           .withColumnRenamed("avg(Temperature)", "Average Temperature") \
                           .withColumnRenamed("avg(Humidity)", "Average Humidity") \
                           .withColumnRenamed("max(WindSpeed)", "Max WindSpeed")
                           # .withColumnRenamed("avg(Rain)", "Average Rain") \

weather_data.show()

+-----------+-------------------+----------------+-------------+
|       City|Average Temperature|Average Humidity|Max WindSpeed|
+-----------+-------------------+----------------+-------------+
|San Antonio|              26.64|            75.0|         9.26|
|     Austin|              26.21|            79.0|         8.23|
|     Dallas|              25.25|            79.0|         6.69|
|    Houston|              25.36|            79.0|         8.75|
+-----------+-------------------+----------------+-------------+



                                                                                

In [59]:
# Graph the data using matplotlib
#The name df1 stands for dataframe1 for the first .csv file, cities
column_names=["City", "Latitude", "Longitude"]
# df1 = pd.read_csv(cities, header=None, names=column_names)

#The name df2 stands for dataframe2 for the second .csv file, distances
# column_names2=["Source", "Destination", "Distance"]
# df2 = pd.read_csv(distances, header=None, names=column_names2)

# Display a 2D graph of the given data.

#Combine the longitude and latitude to form coordinates
weather_data["Coordinates"] = list(zip(weather_data.Longitude, weather_data.Latitude))

#Change coordinates from tuples to shapely point objects
weather_data["Coordinates"] = weather_data["Coordinates"].apply(Point)

#Converting DataFrame to GeoDataFrame
gdf1 = gpd.GeoDataFrame(weather_data, geometry="Coordinates")

#Getting map of Texas
texas = gpd.read_file("http://www2.census.gov/geo/tiger/GENZ2016/shp/cb_2016_us_state_5m.zip")

#gdf1.head()
fig, gax = plt.subplots(figsize=(20,20))

texas.query("NAME == 'Texas'").plot(ax=gax, edgecolor='black',color='white')

gdf1.plot(ax=gax, color='red', alpha = 1)

gax.set_xlabel('Longitude')
gax.set_ylabel('Latitude')
gax.set_title('Texas')

gax.spines['top'].set_visible(False)
gax.spines['right'].set_visible(False)

# Label the cities
for x, y, label in zip(gdf1['Coordinates'].x, gdf1['Coordinates'].y, gdf1['City']):
    gax.annotate(label, xy=(x,y), xytext=(2,2), textcoords='offset points')

plt.show()

AttributeError: 'DataFrame' object has no attribute 'Longitude'

In [53]:
# https://www.askpython.com/python/examples/weather-forecast-using-python
# https://github.com/chubin/wttr.in

# Function to Generate Report
def generate_report(C):
    url = 'https://wttr.in/{}'.format(C)
    try:
        data = requests.get(url)
        data_text = data.text
    except:
        data_text = "Error Occurred"
    print(data_text)

for city in cities:
    generate_report(city)

Weather report: San Antonio

  [38;5;226m   \  /[0m       Partly cloudy
  [38;5;226m _ /""[38;5;250m.-.    [0m [38;5;214m+82[0m([38;5;214m86[0m) °F[0m     
  [38;5;226m   \_[38;5;250m(   ).  [0m [1m↖[0m [38;5;202m18[0m mph[0m       
  [38;5;226m   /[38;5;250m(___(__) [0m 9 mi[0m           
                0.0 in[0m         
                                                       ┌─────────────┐                                                       
┌──────────────────────────────┬───────────────────────┤  Thu 25 Apr ├───────────────────────┬──────────────────────────────┐
│            Morning           │             Noon      └──────┬──────┘     Evening           │             Night            │
├──────────────────────────────┼──────────────────────────────┼──────────────────────────────┼──────────────────────────────┤
│               Overcast       │               Overcast       │ [38;5;226m   \  /[0m       Partly cloudy  │ [38;5;226m _`/""[38;5;250m.-.    [