In [10]:
import requests
import json

## Part 1: Working with JSON Data

In [11]:
sample_json = '''
{
  "station": "USC00305800",
  "name": "New York Central Park",
  "location": {
    "latitude": 40.7789,
    "longitude": -73.9692
  },
  "observations": [
    {"date": "2023-01-01", "temperature": 32, "precipitation": 0.0},
    {"date": "2023-01-02", "temperature": 28, "precipitation": 0.5},
    {"date": "2023-01-03", "temperature": 35, "precipitation": 0.0},
    {"date": "2023-01-04", "temperature": 38, "precipitation": 0.2},
    {"date": "2023-01-05", "temperature": 41, "precipitation": 0.0}
  ]
}
'''

# Parse the JSON
data = json.loads(sample_json)

# Access nested data
print("Station:", data['station'])
print("Location:", data['location'])
print("First observation:", data['observations'][0])

Station: USC00305800
Location: {'latitude': 40.7789, 'longitude': -73.9692}
First observation: {'date': '2023-01-01', 'temperature': 32, 'precipitation': 0.0}


In [12]:
# 1. Extract and print all dates and temperatures (8 points)
print("Date, Temperature")
for obs in data['observations']:
    print(obs['date'], ",", obs['temperature'])

Date, Temperature
2023-01-01 , 32
2023-01-02 , 28
2023-01-03 , 35
2023-01-04 , 38
2023-01-05 , 41


In [13]:
# 2. Calculate average temperature (8 points)
total_temp = 0
count = 0
for obs in data['observations']:
    total_temp += obs['temperature']
    count += 1

avg_temp = total_temp / count
print(f"Average temperature: {avg_temp}°F")

Average temperature: 34.8°F


In [14]:
# 3. Find days with precipitation (9 points)
print("\nDays with precipitation:")
for obs in data['observations']:
    if obs['precipitation']>0:
        print(f"{obs['date']}: {obs['precipitation']}")


Days with precipitation:
2023-01-02: 0.5
2023-01-04: 0.2


**Now try with a real API :**

In [7]:
# Use a real weather API (you may need to sign up for a free API key)
# Example APIs: OpenWeatherMap, NOAA, Weather.gov
# YOUR CODE HERE 

api_key = 'd9b1d9b91ef6b0a5af404acedf850b20'
city = 'New York City'
url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={api_key}"

response = requests.get(url)
nyc_data = response.json()

print(nyc_data. keys())

print ("City:", nyc_data ["name"])
print (f"Weather: {nyc_data ['weather']}")

dict_keys(['coord', 'weather', 'base', 'main', 'visibility', 'wind', 'clouds', 'dt', 'sys', 'timezone', 'id', 'name', 'cod'])
City: New York
Weather: [{'id': 800, 'main': 'Clear', 'description': 'clear sky', 'icon': '01d'}]


## Part 2: Downloading Files with Python

In [6]:
import pooch
import os

# Set up Pooch to download a file
# This example downloads a small air quality dataset
file_path = pooch.retrieve(
    url="https://github.com/pandas-dev/pandas/raw/main/doc/data/air_quality_no2.csv",
    known_hash=None,
)

print("File downloaded to:", file_path)
print("File exists:", os.path.exists(file_path))

File downloaded to: /home/jb5222/.cache/pooch/458dad453f6a48e510cd544bef1854e3-air_quality_no2.csv
File exists: True


In [7]:
# 1. Verify the file was downloaded (5 points)
# Check the file size
file_size = os.path.getsize(file_path)
print(f"File size: {file_size} bytes")

# YOUR CODE HERE: open the file and count how many lines it has
line_count = 0 
with open(file_path, 'r') as file:
    for line in file:
        line_count +=1
print(f"Number of lines: {line_count}")

File size: 31984 bytes
Number of lines: 1036


In [16]:
# 2. Download another file (10 points)
# Find a climate dataset online using the sources we talked about in lecture
# Download it using Pooch
# YOUR CODE HERE:
# my_url = "..."
# my_file = pooch.retrieve(url=my_url, known_hash=None)  # hash optional for first try
# Print info about your downloaded file

my_url="https://data.giss.nasa.gov/gistemp/tabledata_v4/GLB.Ts+dSST.csv"

my_file = pooch.retrieve(
    my_url, 
    known_hash=None)

print("My file downloaded to:", my_file)
print("My file exists:", os.path.exists(my_file))

my_file_size = os.path.getsize(my_file)
print(f"File size: {my_file_size} bytes")

My file downloaded to: /home/jb5222/.cache/pooch/091fa6f46c6d0e4f56f340e4282e9175-GLB.Ts+dSST.csv
My file exists: True
File size: 12878 bytes


In [17]:
# 3. Create a data inventory (5 points)
# List all the files you've downloaded in this assignment
print("\nData Inventory:")
print("1. meteorites.csv - NASA meteorite landings")
print("2. air_quality_no2.csv - Air quality NO2 measurements")
# YOUR CODE HERE: add your file from task 2
print("3. GLB.Ts+dSST.csv – NASA global temperature anomalies")


Data Inventory:
1. meteorites.csv - NASA meteorite landings
2. air_quality_no2.csv - Air quality NO2 measurements
3. GLB.Ts+dSST.csv – NASA global temperature anomalies


## Part 3: Understanding NetCDF Metadata

In [19]:
import requests

# OPeNDAP provides metadata in different formats
# We'll get basic info about a climate dataset

base_url = "http://iridl.ldeo.columbia.edu/expert/SOURCES/.NOAA/.NCEP/.CPC/.UNIFIED_PRCP/.GAUGE_BASED/.GLOBAL/.v1p0/.Monthly/.RETRO/.rain/dods"

# Get DDS (Dataset Descriptor Structure) - describes the structure
dds_url = base_url + ".dds"
response = requests.get(dds_url)

print("Dataset Structure:")
print(response.text[:500])  # Print first 500 characters

Dataset Structure:
Dataset {
    Float32 T[T = 324];
    Float32 Y[Y = 360];
    Float32 X[X = 720];
    Grid {
     ARRAY:
        Float32 rain[T = 324][Y = 360][X = 720];
     MAPS:
        Float32 T[T = 324];
        Float32 Y[Y = 360];
        Float32 X[X = 720];
    } rain;
} rain;



#### 1. Identify dimensions and variables (5 points)
Look at the DDS output above and answer:
##### - What are the dimension names?
The dimensions are time (T=324), longitude (X=720) and latitude (Y=360). 

##### - What is the main variable name?
The name of the main variable is "rain".


In [22]:
# 2. Get data attributes (5 points)
# DAS (Dataset Attribute Structure) contains metadata
das_url = base_url + ".das"
# YOUR CODE HERE: make a request to das_url and print first 1000 characters
response = requests.get(das_url)
print("Dataset Attribute Structure:")
print(response.text[:1000])

Dataset Attribute Structure:
Attributes {
    X {
        String standard_name "longitude";
        Float32 pointwidth 0.5;
        Int32 gridtype 1;
        String units "degree_east";
    }
    T {
        Float32 pointwidth 1.0;
        String calendar "360";
        Int32 gridtype 0;
        String units "months since 1960-01-01";
    }
    Y {
        String standard_name "latitude";
        Float32 pointwidth 0.5;
        Int32 gridtype 0;
        String units "degree_north";
    }
    rain {
        Int32 pointwidth 0;
        String standard_name "lwe_precipitation_rate";
        Float32 file_missing_value -999.0;
        String history "Boxes with less than 0.0% dropped";
        Float32 missing_value NaN;
        String units "mm/day";
        String long_name "Monthly Precipitation";
    }
NC_GLOBAL {
    String Conventions "IRIDL";
}
}



#### 3. Document what you learned (5 points)
##### In a markdown cell, write:
##### - What does this dataset contain?
This dataset contains global monthly precipitation data from NOAA CPC Global Unified Gauge-Based Analysis.
##### - What time period does it cover?
It covers monthly data since January 1st, 1960.
##### - What geographic region does it cover?
This dataset has a global coverage as the 720x360 grid points at 0.5º resolution cover all 360º of longitude and 180º of latitude.
##### - What are the units of the main variable?
The units are mm/day.
##### Find this info in the DAS output