## Setup: Install Required Packages

In [2]:
import requests
import json

# Part 1: Working with JSON Data

In [8]:
sample_json = '''
{
  "station": "USC00305800",
  "name": "New York Central Park",
  "location": {
    "latitude": 40.7789,
    "longitude": -73.9692
  },
  "observations": [
    {"date": "2023-01-01", "temperature": 32, "precipitation": 0.0},
    {"date": "2023-01-02", "temperature": 28, "precipitation": 0.5},
    {"date": "2023-01-03", "temperature": 35, "precipitation": 0.0},
    {"date": "2023-01-04", "temperature": 38, "precipitation": 0.2},
    {"date": "2023-01-05", "temperature": 41, "precipitation": 0.0}
  ]
}
'''

# Parse the JSON
data = json.loads(sample_json)

# Access nested data
print("Station:", data['station'])
print("Location:", data['location'])
print("First observation:", data['observations'][0])

Station: USC00305800
Location: {'latitude': 40.7789, 'longitude': -73.9692}
First observation: {'date': '2023-01-01', 'temperature': 32, 'precipitation': 0.0}


### 1. Extract and print all dates and temperatures

In [14]:
print("Date, Temperature (°F)")
for obs in data["observations"]:
    print(f"{obs['date']}, {obs['temperature']}")

Date, Temperature (°F)
2023-01-01, 32
2023-01-02, 28
2023-01-03, 35
2023-01-04, 38
2023-01-05, 41


### 2. Calculate the average temperature

In [10]:
total_temp = 0
count = 0
for obs in data["observations"]:
    total_temp += obs["temperature"]
    count += 1
avg_temp = total_temp / count
print(f"\nAverage temperature: {avg_temp:.2f}°F")


Average temperature: 34.80°F


### 3. Find days with precipitation

In [13]:
print("\nDays with precipitation:")
for obs in data["observations"]:
    if obs["precipitation"] > 0:
        print(f"{obs['date']}, {obs['precipitation']}")


Days with precipitation:
2023-01-02, 0.5
2023-01-04, 0.2


### Use a real weather API

In [18]:
lat, lon = 40.7789, -73.9692
headers = {"User-Agent": "CelinaGong (computing methods assignment)"}

points_url = f"https://api.weather.gov/points/{lat},{lon}"
resp = requests.get(points_url, headers=headers)
resp.raise_for_status()
points_data = resp.json()

forecast_url = points_data["properties"]["forecast"]

fresp = requests.get(forecast_url, headers=headers)
fresp.raise_for_status()
forecast_data = fresp.json()

periods = forecast_data["properties"]["periods"]

data = {
    "station": "weather.gov",
    "name": "NYC (Central Park) forecast",
    "location": {"latitude": lat, "longitude": lon},
    "observations": []
}

for p in periods[:10]:
    start_time = p.get("startTime", "")
    date = start_time[:10] if len(start_time) >= 10 else p.get("name", "unknown")

    temperature = p.get("temperature", None)

    pop = p.get("probabilityOfPrecipitation", {})
    precipitation = 0
    if isinstance(pop, dict) and pop.get("value") is not None:
        precipitation = pop["value"]

    data["observations"].append({
        "date": date,
        "temperature": temperature,
        "precipitation": precipitation
    })


# 1. Extract and print all dates and temperatures
print("Date, Temperature (°F)")
for obs in data["observations"]:
    print(f"{obs['date']}, {obs['temperature']}")

# 2. Calculate the average temperature
total_temp = 0
count = 0
for obs in data["observations"]:
    if obs["temperature"] is not None:
        total_temp += obs["temperature"]
        count += 1

avg_temp = total_temp / count if count else float("nan")
print(f"\nAverage temperature: {avg_temp:.2f}°F")

# 3. Find days with precipitation 
## no precipitation provided, probability of Precipitation (in %) as precipitation indicator
print("\nDays with precipitation (probability %, > 0):")
for obs in data["observations"]:
    if obs["precipitation"] > 0:
        print(f"{obs['date']} (precip={obs['precipitation']}%)")

Date, Temperature (°F)
2026-02-17, 38
2026-02-18, 42
2026-02-18, 36
2026-02-19, 41
2026-02-19, 36
2026-02-20, 42
2026-02-20, 37
2026-02-21, 45
2026-02-21, 34
2026-02-22, 38

Average temperature: 38.90°F

Days with precipitation (probability %, > 0):
2026-02-17 (precip=5%)
2026-02-18 (precip=58%)
2026-02-18 (precip=45%)
2026-02-19 (precip=14%)
2026-02-19 (precip=46%)
2026-02-20 (precip=91%)
2026-02-20 (precip=91%)
2026-02-21 (precip=22%)
2026-02-21 (precip=26%)
2026-02-22 (precip=52%)


# Part 2: Downloading Files with Python (Pooch)

In [21]:
import os
import pooch

file_path = pooch.retrieve(
    url="https://github.com/pandas-dev/pandas/raw/main/doc/data/air_quality_no2.csv",
    known_hash=None
)

print("File downloaded to:", file_path)
print("File exists:", os.path.exists(file_path))

Downloading data from 'https://github.com/pandas-dev/pandas/raw/main/doc/data/air_quality_no2.csv' to file '/home/xg2467/.cache/pooch/458dad453f6a48e510cd544bef1854e3-air_quality_no2.csv'.
SHA256 hash of downloaded file: 365ca31c9296ac200e73d357e16a3c1340f9ce6746c83bf81403046dcb374361
Use this value as the 'known_hash' argument of 'pooch.retrieve' to ensure that the file hasn't changed if it is downloaded again in the future.


File downloaded to: /home/xg2467/.cache/pooch/458dad453f6a48e510cd544bef1854e3-air_quality_no2.csv
File exists: True


### 1. Verify the file was downloaded

In [22]:
file_size = os.path.getsize(file_path)
print(f"File size: {file_size} bytes")

line_count = 0
with open(file_path, "r", encoding="utf-8") as f:
    for _ in f:
        line_count += 1

print("Number of lines:", line_count)

File size: 31984 bytes
Number of lines: 1036


### 2. Download another file

In [23]:
my_url = "https://www.ncei.noaa.gov/access/monitoring/climate-at-a-glance/global/time-series/globe/land_ocean/12/1/1895-2023.csv"
my_file = pooch.retrieve(url=my_url, known_hash=None)

print("My file downloaded to:", my_file)
print("My file exists:", os.path.exists(my_file))
print("My file size (bytes):", os.path.getsize(my_file))

Downloading data from 'https://www.ncei.noaa.gov/access/monitoring/climate-at-a-glance/global/time-series/globe/land_ocean/12/1/1895-2023.csv' to file '/home/xg2467/.cache/pooch/af463bf2aa97aaab0f46972aaf76e912-1895-2023.csv'.
SHA256 hash of downloaded file: 79937c38fb6a8db04a5fe7417372c340422142335f1714439106b728fe976322
Use this value as the 'known_hash' argument of 'pooch.retrieve' to ensure that the file hasn't changed if it is downloaded again in the future.


My file downloaded to: /home/xg2467/.cache/pooch/af463bf2aa97aaab0f46972aaf76e912-1895-2023.csv
My file exists: True
My file size (bytes): 1467


### 3. Create a data inventory

In [34]:
print("\nData Inventory:")
print("1. air_quality_no2.csv - Air quality NO2 measurements")
print("2. land_ocean_global_temp.csv - NOAA Global Land-Ocean Temperature Anomalies")


Data Inventory:
1. air_quality_no2.csv - Air quality NO2 measurements
2. land_ocean_global_temp.csv - NOAA Global Land-Ocean Temperature Anomalies


# Part 3: Understanding NetCDF Metadata

In [26]:
import requests

base_url = "http://iridl.ldeo.columbia.edu/expert/SOURCES/.NOAA/.NCEP/.CPC/.UNIFIED_PRCP/.GAUGE_BASED/.GLOBAL/.v1p0/.Monthly/.RETRO/.rain/dods"

dds_url = base_url + ".dds"
response = requests.get(dds_url)

print("Dataset Structure:")
print(response.text[:500])

Dataset Structure:
Dataset {
    Float32 Y[Y = 360];
    Float32 X[X = 720];
    Float32 T[T = 324];
    Grid {
     ARRAY:
        Float32 rain[T = 324][Y = 360][X = 720];
     MAPS:
        Float32 T[T = 324];
        Float32 Y[Y = 360];
        Float32 X[X = 720];
    } rain;
} rain;



### 1. Identify dimensions and variables

 - What are the dimension names?
*T = 324, Y = 360, X = 720*
 - What is the main variable name?
*Rain*


### 2. Get data attributes

In [33]:
das_url = base_url + ".das"
das_response = requests.get(das_url)
das_response.raise_for_status()

print("Dataset Attributes first 1000 preview:")
print(das_response.text[:1000])

Dataset Attributes first 1000 preview:
Attributes {
    Y {
        String standard_name "latitude";
        Float32 pointwidth 0.5;
        Int32 gridtype 0;
        String units "degree_north";
    }
    X {
        String standard_name "longitude";
        Float32 pointwidth 0.5;
        Int32 gridtype 1;
        String units "degree_east";
    }
    T {
        Float32 pointwidth 1.0;
        String calendar "360";
        Int32 gridtype 0;
        String units "months since 1960-01-01";
    }
    rain {
        Int32 pointwidth 0;
        String standard_name "lwe_precipitation_rate";
        Float32 file_missing_value -999.0;
        String history "Boxes with less than 0.0% dropped";
        Float32 missing_value NaN;
        String units "mm/day";
        String long_name "Monthly Precipitation";
    }
NC_GLOBAL {
    String Conventions "IRIDL";
}
}



### 3. Document what you learned

 - What does this dataset contain?

*It contains monthly precipitation on a global lat–lon grid. The main variable is rain (long_name: “Monthly Precipitation”).*

 - What time period does it cover?

*The time coordinate T is in months since 1960-01-01 (monthly time steps).*

 - What geographic region does it cover?

*It is a global latitude–longitude grid with 0.5° spacing (Y = latitude, X = longitude).*

 - What are the units of the main variable?

*The main variable rain is reported in mm/day*