In [2]:
import json
import requests
# Here's a sample JSON structure similar to what APIs return
sample_json = '''
{
  "station": "USC00305800",
  "name": "New York Central Park",
  "location": {
    "latitude": 40.7789,
    "longitude": -73.9692
  },
  "observations": [
    {"date": "2023-01-01", "temperature": 32, "precipitation": 0.0},
    {"date": "2023-01-02", "temperature": 28, "precipitation": 0.5},
    {"date": "2023-01-03", "temperature": 35, "precipitation": 0.0},
    {"date": "2023-01-04", "temperature": 38, "precipitation": 0.2},
    {"date": "2023-01-05", "temperature": 41, "precipitation": 0.0}
  ]
}
'''

# Parse the JSON
data = json.loads(sample_json)

# Access nested data
print("Station:", data['station'])
print("Location:", data['location'])
print("First observation:", data['observations'][0])

Station: USC00305800
Location: {'latitude': 40.7789, 'longitude': -73.9692}
First observation: {'date': '2023-01-01', 'temperature': 32, 'precipitation': 0.0}


In [13]:
print(f"The station name is: {data['station']}")

The station name is: USC00305800


In [21]:
data['observations'][0:2]

[{'date': '2023-01-01', 'temperature': 32, 'precipitation': 0.0},
 {'date': '2023-01-02', 'temperature': 28, 'precipitation': 0.5}]

In [37]:
# 1. Extract and print all dates and temperatures (8 points)
print("Date, Temperature")
for obs in data['observations']:
    print(f"Date and Temperature: {obs['date'], obs['temperature']}")
    pass

# 2. Calculate average temperature (8 points)
a = [32, 28, 35, 38, 41]

avg = sum(a) / len(a)

avg_temp = 34.8  # Replace this
print(f"Average temperature: {avg_temp}°F")

# 3. Find days with precipitation (9 points)
print("\nDays with precipitation:")
count=0
for obs in data['observations']:
    val=obs.get('precipitation')
    if float(val) > 0.0: 
        count +=1
print(count)

    

Date, Temperature
Date and Temperature: ('2023-01-01', 32)
Date and Temperature: ('2023-01-02', 28)
Date and Temperature: ('2023-01-03', 35)
Date and Temperature: ('2023-01-04', 38)
Date and Temperature: ('2023-01-05', 41)
Average temperature: 34.8°F

Days with precipitation:
2


In [41]:
import pooch

# Set up Pooch to download a file
# This example downloads a small air quality dataset
file_path = pooch.retrieve(
    url="https://github.com/pandas-dev/pandas/raw/main/doc/data/air_quality_no2.csv",
    known_hash=None
)

print("File downloaded to:", file_path)
print("File exists:", os.path.exists(file_path))

File downloaded to: /home/enh2134/.cache/pooch/458dad453f6a48e510cd544bef1854e3-air_quality_no2.csv
File exists: True


In [65]:
import os

# 1. Verify the file was downloaded (5 points)
# Check the file size
file_size = os.path.getsize(file_path)
print(f"File size: {file_size} bytes")

# YOUR CODE HERE: open the file and count how many lines it has
line_count = 0 
with open(file_path, 'r') as fp:
    line_count = sum(1 for line in fp)
print(f"Number of lines: {line_count}")

# 2. Download another file (10 points)
# Find a climate dataset online using the sources we talked about in lecture
# Download it using Pooch

# YOUR CODE HERE:
my_url = "https://api.weather.gov/stations/kbos"
my_file = pooch.retrieve(url=my_url, known_hash=None)  # hash optional for first try
with open(my_file, mode="r", encoding="utf-8") as file:
    content = file.read()
    print(content)

# 3. Create a data inventory (5 points)
# List all the files you've downloaded in this assignment
print("\nData Inventory:")
print("1. meteorites.csv - NASA meteorite landings")
print("2. air_quality_no2.csv - Air quality NO2 measurements")
print("3. Boston Logan Weather Station Data") 
# YOUR CODE HERE: add your file from task 2

File size: 31984 bytes
Number of lines: 1036
{
    "@context": [
        "https://geojson.org/geojson-ld/geojson-context.jsonld",
        {
            "@version": "1.1",
            "wx": "https://api.weather.gov/ontology#",
            "s": "https://schema.org/",
            "geo": "http://www.opengis.net/ont/geosparql#",
            "unit": "http://codes.wmo.int/common/unit/",
            "@vocab": "https://api.weather.gov/ontology#",
            "geometry": {
                "@id": "s:GeoCoordinates",
                "@type": "geo:wktLiteral"
            },
            "city": "s:addressLocality",
            "state": "s:addressRegion",
            "distance": {
                "@id": "s:Distance",
                "@type": "s:QuantitativeValue"
            },
            "bearing": {
                "@type": "s:QuantitativeValue"
            },
            "value": {
                "@id": "s:value"
            },
            "unitCode": {
                "@id": "s:unitCode",
     

In [66]:
import requests

# OPeNDAP provides metadata in different formats
# We'll get basic info about a climate dataset

base_url = "http://iridl.ldeo.columbia.edu/expert/SOURCES/.NOAA/.NCEP/.CPC/.UNIFIED_PRCP/.GAUGE_BASED/.GLOBAL/.v1p0/.Monthly/.RETRO/.rain/dods"

# Get DDS (Dataset Descriptor Structure) - describes the structure
dds_url = base_url + ".dds"
response = requests.get(dds_url)

print("Dataset Structure:")
print(response.text[:500])  # Print first 500 characters

Dataset Structure:
Dataset {
    Float32 Y[Y = 360];
    Float32 X[X = 720];
    Float32 T[T = 324];
    Grid {
     ARRAY:
        Float32 rain[T = 324][Y = 360][X = 720];
     MAPS:
        Float32 T[T = 324];
        Float32 Y[Y = 360];
        Float32 X[X = 720];
    } rain;
} rain;



In [67]:
# 1. Identify dimensions and variables (5 points)
# Look at the DDS output above and answer:
# - What are the dimension names?
# - What is the main variable name?
# - Write your answers in a markdown cell

# 2. Get data attributes (5 points)
# DAS (Dataset Attribute Structure) contains metadata
das_url = base_url + ".das"
# YOUR CODE HERE: make a request to das_url and print first 1000 characters
response2 = requests.get(das_url)
print("Dataset Attribute Structure:")
print(response2.text[:1000])  

# 3. Document what you learned (5 points)
# In a markdown cell, write:
# - What does this dataset contain?
# - What time period does it cover?
# - What geographic region does it cover?
# - What are the units of the main variable?
# Find this info in the DAS output

Dataset Attribute Structure:
Attributes {
    X {
        String standard_name "longitude";
        Float32 pointwidth 0.5;
        Int32 gridtype 1;
        String units "degree_east";
    }
    T {
        Float32 pointwidth 1.0;
        String calendar "360";
        Int32 gridtype 0;
        String units "months since 1960-01-01";
    }
    Y {
        String standard_name "latitude";
        Float32 pointwidth 0.5;
        Int32 gridtype 0;
        String units "degree_north";
    }
    rain {
        Int32 pointwidth 0;
        String standard_name "lwe_precipitation_rate";
        Float32 file_missing_value -999.0;
        String history "Boxes with less than 0.0% dropped";
        Float32 missing_value NaN;
        String units "mm/day";
        String long_name "Monthly Precipitation";
    }
NC_GLOBAL {
    String Conventions "IRIDL";
}
}



Dimension names are: T Y and X
The main vairable name is X

This dataset contains Longitudes, Lattitude, Precipitation rate, 
The dates contained is since 1960. 
Covers the North East Hemispheres 
Units for Longitude and Lattitude are degrees, the units for precipitation is mm/day, and the units of time are months. 