# Part 1 Working with JSON Data

In [14]:
# You may need to install these
# pip install pooch requests

import requests
import json

In [16]:
import json
import requests
# Here's a sample JSON structure similar to what APIs return
sample_json = '''
{
  "station": "USC00305800",
  "name": "New York Central Park",
  "location": {
    "latitude": 40.7789,
    "longitude": -73.9692
  },
  "observations": [
    {"date": "2023-01-01", "temperature": 32, "precipitation": 0.0},
    {"date": "2023-01-02", "temperature": 28, "precipitation": 0.5},
    {"date": "2023-01-03", "temperature": 35, "precipitation": 0.0},
    {"date": "2023-01-04", "temperature": 38, "precipitation": 0.2},
    {"date": "2023-01-05", "temperature": 41, "precipitation": 0.0}
  ]
}
'''

# Parse the JSON
data = json.loads(sample_json)

# Access nested data
print("Station:", data['station'])
print("Location:", data['location'])
print("First observation:", data['observations'][0])

Station: USC00305800
Location: {'latitude': 40.7789, 'longitude': -73.9692}
First observation: {'date': '2023-01-01', 'temperature': 32, 'precipitation': 0.0}


In [17]:
# 1. Extract and print all dates and temperatures (8 points)
print("Date, Temperature")
for obs in data['observations']:
    # YOUR CODE HERE: print date and temperature for each observation
    print(obs['date'], obs['temperature'])
    pass

Date, Temperature
2023-01-01 32
2023-01-02 28
2023-01-03 35
2023-01-04 38
2023-01-05 41


In [18]:
# 2. Calculate average temperature (8 points)
total_temp = 0
count = 0
# YOUR CODE HERE: calculate average
for obs in data['observations']:
    total_temp = total_temp + obs['temperature']
    count = count + 1
avg_temp = total_temp/count  # Replace this
print(f"Average temperature: {avg_temp}°F")

Average temperature: 34.8°F


In [19]:
# 3. Find days with precipitation (9 points)
print("\nDays with precipitation:")
# YOUR CODE HERE
dayswP = 0
for obs in data['observations']:
    if obs['precipitation'] > 0:
        dayswP += 1
dayswP


Days with precipitation:


2

# Part 2: Downloading Files with Python

In [21]:
import pooch
import os

# Set up Pooch to download a file
# This example downloads a small air quality dataset
file_path = pooch.retrieve(
    url="https://github.com/pandas-dev/pandas/raw/main/doc/data/air_quality_no2.csv",
    known_hash=None
)

print("File downloaded to:", file_path)
print("File exists:", os.path.exists(file_path))

File downloaded to: /home/cei2119/.cache/pooch/458dad453f6a48e510cd544bef1854e3-air_quality_no2.csv
File exists: True


In [31]:
# 1. Verify the file was downloaded (5 points)
# Check the file size
file_size = os.path.getsize(file_path)
print(f"File size: {file_size} bytes")

# YOUR CODE HERE: open the file and count how many lines it has
import pandas as pd
line_count = 0
df = pd.read_csv("/home/cei2119/.cache/pooch/458dad453f6a48e510cd544bef1854e3-air_quality_no2.csv")
for i in df.index: 
    line_count += 1


print(f"Number of lines: {line_count}")

File size: 31984 bytes
Number of lines: 1035


In [37]:
# 2. Download another file (10 points)
# Find a climate dataset online using the sources we talked about in lecture
# Download it using Pooch

# YOUR CODE HERE:
my_url = "https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-docs/gracefo/open/docs/GRACE_GRACE-FO_Months_RL06.csv"
my_file = pooch.retrieve(url=my_url, known_hash=None)  # hash optional for first try
#Print info about your downloaded file
df2 = pd.read_csv(my_url)
print(my_file)
print(df2)

/home/cei2119/.cache/pooch/c89ae2342e61af4b3a058302cfbc3311-GRACE_GRACE-FO_Months_RL06.csv
     Month Sr No  GRACE/GRACE-FO record index MONTH  YEAR  START DAY  YEAR.1  \
0              1                          1.0   APR  2002         91    2002   
1              2                          2.0   MAY  2002        121    2002   
2              3                          NaN   JUN  2002        152    2002   
3              4                          NaN   JUL  2002        182    2002   
4              5                          3.0   AUG  2002        213    2002   
..           ...                          ...   ...   ...        ...     ...   
279          280                        247.0   JUL  2025        182    2025   
280          281                        248.0   AUG  2025        213    2025   
281          282                        249.0   SEP  2025        244    2025   
282          283                        250.0   OCT  2025        274    2025   
283          284             

In [34]:
# 3. Create a data inventory (5 points)
# List all the files you've downloaded in this assignment
print("\nData Inventory:")
print("1. meteorites.csv - NASA meteorite landings")
print("2. air_quality_no2.csv - Air quality NO2 measurements")
# YOUR CODE HERE: add your file from task 2
print("3. GRACE_GRACE-FO_Months_RL06.csv - GRACE monthly data integration range")


Data Inventory:
1. meteorites.csv - NASA meteorite landings
2. air_quality_no2.csv - Air quality NO2 measurements
3. GRACE_GRACE-FO_Months_RL06.csv - GRACE monthly data integration range


In [None]:
import os

# 1. Verify the file was downloaded (5 points)
# Check the file size
file_size = os.path.getsize(file_path)
print(f"File size: {file_size} bytes")

# YOUR CODE HERE: open the file and count how many lines it has
import pandas as pd
line_count = 0
df = pd.read_csv("/home/cei2119/.cache/pooch/c89ae2342e61af4b3a058302cfbc3311-GRACE_GRACE-FO_Months_RL06.csv")
for i in df.index: 
    line_count += 1
    
print(f"Number of lines: {line_count}")

# Part 3: Understanding NetCDF Metadata

In [39]:
import requests

# OPeNDAP provides metadata in different formats
# We'll get basic info about a climate dataset

base_url = "http://iridl.ldeo.columbia.edu/expert/SOURCES/.NOAA/.NCEP/.CPC/.UNIFIED_PRCP/.GAUGE_BASED/.GLOBAL/.v1p0/.Monthly/.RETRO/.rain/dods"

# Get DDS (Dataset Descriptor Structure) - describes the structure
dds_url = base_url + ".dds"
response = requests.get(dds_url)

print("Dataset Structure:")
print(response.text[:500])  # Print first 500 characters

Dataset Structure:
Dataset {
    Float32 T[T = 324];
    Float32 Y[Y = 360];
    Float32 X[X = 720];
    Grid {
     ARRAY:
        Float32 rain[T = 324][Y = 360][X = 720];
     MAPS:
        Float32 T[T = 324];
        Float32 Y[Y = 360];
        Float32 X[X = 720];
    } rain;
} rain;



# 1. Identify dimensions and variables (5 points)
# Look at the DDS output above and answer:
# - What are the dimension names?
* T
* Y
* X
# - What is the main variable name?
* rain
# - Write your answers in a markdown cell

In [41]:
# 2. Get data attributes (5 points)
# DAS (Dataset Attribute Structure) contains metadata
das_url = base_url + ".das"
# YOUR CODE HERE: make a request to das_url and print first 1000 characters
response = requests.get(das_url)

print("Dataset Structure:")
print(response.text[:1000])


Dataset Structure:
Attributes {
    X {
        String standard_name "longitude";
        Float32 pointwidth 0.5;
        Int32 gridtype 1;
        String units "degree_east";
    }
    T {
        Float32 pointwidth 1.0;
        String calendar "360";
        Int32 gridtype 0;
        String units "months since 1960-01-01";
    }
    Y {
        String standard_name "latitude";
        Float32 pointwidth 0.5;
        Int32 gridtype 0;
        String units "degree_north";
    }
    rain {
        Int32 pointwidth 0;
        String standard_name "lwe_precipitation_rate";
        Float32 file_missing_value -999.0;
        String history "Boxes with less than 0.0% dropped";
        Float32 missing_value NaN;
        String units "mm/day";
        String long_name "Monthly Precipitation";
    }
NC_GLOBAL {
    String Conventions "IRIDL";
}
}



# 3. Document what you learned (5 points)
# In a markdown cell, write:
## - What does this dataset contain?
* monthly precipitation data, gridded cells based on long lat and time that store rainfall data
## - What time period does it cover?
* months since 1960-01-01, 324 months since then, 27 years
## - What geographic region does it cover?
* global dataset with 0.5 degree cells
## - What are the units of the main variable?
* mm/day
# Find this info in the DAS output