
# Earth Data Sources

In [None]:
%%capture
!pip install netcdf4
import os, fnmatch, shutil, datetime, numpy as np, pandas as pd, xarray as xr
%load_ext google.colab.data_table
#from google.colab import drive; drive.mount( '/content/drive' )

# 1. Ground Observations

HadISD: https://www.metoffice.gov.uk/hadobs/hadisd/v311_202007p/download.html


GHCN (Global Historical Climate Network): https://docs.opendata.aws/noaa-ghcn-pds/readme.html


## 1.1 Station weather data from GHCN (1763 - present)

In [None]:
cols = ['ID','lat','lon','elv','state','name','gsnFLAG','hcnFLAG','WMO']
colspecs = [(1,11),(13,20),(22,30),(32,37),(39,40),(42,71),(73,75),(77,79),(81,85)]

df = pd.read_fwf( 'http://noaa-ghcn-pds.s3.amazonaws.com/ghcnd-stations.txt', colspecs = colspecs, names = cols  )
df.head(10)

In [None]:
year = 1763
cols = ['ID','YEAR/MONTH/DAY','ELEMENT','DATA VALUE','M-FLAG','Q-FLAG','S-FLAG','OBS-TIME']
df = pd.read_csv( f'http://noaa-ghcn-pds.s3.amazonaws.com/csv/{year}.csv',  names = cols )
df

# Check in 

Pick a year and merge the dataframe by station ID so you have one big dataframe

In [None]:
### put your code here

## 1.2 Sub-daily (HadISD) 

https://www.metoffice.gov.uk/hadobs/hadisd/v311_202007p/download.html



```
!wget https://www.metoffice.gov.uk/hadobs/hadisd/v311_202007p/data/WMO_000000-029999_heat_stress.tar.gz
```



In [None]:
!wget https://www.metoffice.gov.uk/hadobs/hadisd/v311_202007p/data/hadisd.3.1.1.202007p_19310101-20200801_010010-99999.nc.gz



---



---


# 2. Historical Reanalysis (on a grid!) Data  products (ERA-5)

Gridded reanalysis is a data product that combines models and observations. ERA is the best version. See all the options here: https://cds.climate.copernicus.eu/cdsapp#!/search?type=dataset

we are going to get it on Amazon

ref: 
https://github.com/planet-os/notebooks/blob/master/aws/era5-s3-via-boto.ipynb

In [None]:
import boto3, botocore

era5_bucket = 'era5-pds'
client = boto3.client('s3', config=botocore.client.Config(signature_version=botocore.UNSIGNED))

In [None]:
keys = []
date = datetime.date( 2019, 1, 1 ) # update to desired date
prefix = date.strftime( '%Y/%m/' )

response = client.list_objects_v2( Bucket=era5_bucket, Prefix=prefix )
response_meta = response.get( 'ResponseMetadata' )

if response_meta.get('HTTPStatusCode') == 200:
    contents = response.get('Contents')
    if contents == None: print("No objects are available for %s" % date.strftime('%B, %Y'))
    else:
        for obj in contents: keys.append(obj.get('Key'))
        print( "There are %s objects available for %s\n--" % (len(keys), date.strftime('%B, %Y')))
        for k in keys: print(k)
else: print( "There was an error with your request." )

In [None]:
client.download_file( era5_bucket, 
                     '2019/01/data/air_temperature_at_2_metres.nc', 
                     '/content/air_temperature_at_2_metres.nc' )

In [None]:
tas = xr.open_dataset('/content/air_temperature_at_2_metres.nc', chunks= 'auto')
tas


In [None]:
tas.air_temperature_at_2_metres.isel(time0=10).squeeze().plot()



---



---


# 3. CMIP6 Climate model future projections

Climate model intercomparison project, just released

google cloud bucket: https://console.cloud.google.com/marketplace/product/noaa-public/cmip6?q=search&referrer=search&project=e-context-252517

In [None]:
%%capture
!pip install --upgrade zarr gcsfs cftime nc-time-axis
import fsspec, zarr, gcsfs; gcs = gcsfs.GCSFileSystem( token = 'anon' )

## 3.1  Browse Catalog

The data catatalog is stored as a CSV file. Here we read it with Pandas.

The columns of the dataframe correspond to the CMI6 controlled vocabulary. A beginners' guide to these terms is available in [this document](https://docs.google.com/document/d/1yUx6jr9EdedCOLd--CPdTfGDwEwzPpCF6p1jRmqx-0Q). 

In [None]:
### querry a list of data ###
df = pd.read_csv( 'https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv' )
df.head(10)

In [None]:
df = pd.read_csv( 'https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv' )
df = df.query( "activity_id=='ScenarioMIP' & variable_id == 'tas' & experiment_id == 'ssp585' & source_id == 'IPSL-CM6A-LR' & table_id == 'day'" )
df

## 3.2 Load in the data

In [None]:
ds = xr.open_zarr( gcs.get_mapper( df.zstore.values[0] ), consolidated = True )
ds

In [None]:
ds.tas.sel(time='2080-01-01').squeeze().plot()

## Check in

Load in a historical run from a differrnt climate model and plot a day of precip

hint: activity_id=='CMIP' & experiment_id == 'historical' & variable_id == 'pr'

In [None]:
### put your code here

## 3.3 ERA-5 on Google Cloud (best option)

In [None]:
ds = xr.open_zarr( fsspec.get_mapper('gcs://pangeo-era5/reanalysis/spatial-analysis'), consolidated = True, chunks = 'auto'  )
ds

In [None]:
ds.t2m.sel(time='2000-12-31T23:00:00').squeeze().plot()




---



---


# 4. NASA Products

NEED TO MAKE AN ACCOUNT: https://urs.earthdata.nasa.gov/users/new

NASA has all sorts of data, best way to get this... if you know what you want, is to just download it from the data pool.

Options: https://search.earthdata.nasa.gov/search

example landing page:  https://disc.gsfc.nasa.gov/datasets/GPM_3IMERGDF_06/summary?keywords=%22IMERG%20final%22


In [None]:
import urllib.request
from bs4 import BeautifulSoup

## 4.1 Satelite Precip

In [None]:
url = 'https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGDF.06/2001/01/'
website = urllib.request.urlopen( url )
html = website.read()
html

In [None]:
url = 'https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGDF.06/2001/01/'
website = urllib.request.urlopen( url )
html = website.read()
soup = BeautifulSoup(html, 'html.parser')
soup

In [None]:
url = 'https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGDF.06/2001/01/'
website = urllib.request.urlopen( url )
html = website.read()
soup = BeautifulSoup(html, 'html.parser')
tags = str( soup('a') )
tags = str.split(tags, ',')
tags = fnmatch.filter(tags, '*nc4*')
tags = [str.split(i, '">')[0] for i in tags]
tags = fnmatch.filter(tags, '*nc4')
tags = [str.split(i, '="')[1] for i in tags]
tags = list(set(tags))

tags = [ url + tag for tag in tags]
tags

In [None]:
!wget        --user=yourusername --password=yourpassword

## 4.2 loop through multple cases...




In [None]:
#os.system( f"wget --user=yourusername --password=yourpassword '{url+tag[0]}' -P '/content/drive/my_bootcamp_2020/'")
os.system( f"wget --user=yourusername --password=yourpassword '{url+tag[0]}' -P '/content/drive/my_bootcamp_2020/'")

In [None]:
for tag in tags: os.system( f"wget --user=yourusername --password=yourpassword '{url+tag[0]}' -P '/content/'")

```
# months = [ '01/','02/','03/','04/','05/','06/','07/','08/','09/','10/','11/','12/' ]
years  = [2009, 2008, 2007, 2006, 2005, 2004, 2003, 2002, 2001]
for year in years:
    root = 'https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGDF.06/{0}/'.format(year) #daily precip, mircowave and IR
    for month in months:
        ...
```

---

---
# 5. Google earth engine

NEED TO MAKE AN ACCOUNT: https://signup.earthengine.google.com/

reference (mostly javascript, unfortunately): https://developers.google.com/earth-engine

data: https://developers.google.com/earth-engine/datasets


In [None]:
%%capture
!pip install earthengine-api 
import folium, pylab as plt

In [None]:
!earthengine authenticate
import ee; ee.Initialize() 

## 5.1 read data out of an image

In [None]:
# Print the elevation of Mount Everest
dem = ee.Image('USGS/SRTMGL1_003')
xy = ee.Geometry.Point([86.9250, 27.9881])  # you have to know lon and lat
elev = dem.sample(xy, 30).first().get('elevation').getInfo()
print('Mount Everest elevation (m):', elev)

## 5.2 do some calcualtions on some LADSAT images

In [None]:
# Fetch a Landsat image.
img = ee.Image('LANDSAT/LT05/C01/T1_SR/LT05_034033_20000913')

# Select Red and NIR bands, scale them, and sample 1000 points. Create a feature collection.
sampFC = img.select( ['B3','B4'] ).divide( 1000 ).sample( scale = 30, numPixels = 10000 )

# Arrange the sample as a list of lists.
sampDict = sampFC.reduceColumns( ee.Reducer.toList().repeat(2), ['B3', 'B4'] )
sampList = ee.List( sampDict.get('list') )

# Save server-side ee.List as a client-side Python list.  ### KEY POINT ###
sampData = sampList.getInfo()

# Display a scatter plot of Red-NIR sample pairs using matplotlib.
plt.scatter( sampData[0], sampData[1], alpha = 0.3 )
plt.xlabel( 'Red', fontsize = 12 )
plt.ylabel( 'NIR', fontsize = 12 )
plt.show()

## 5.3 cloud masking

In [None]:
# Use these bands for prediction.
bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7']
# Use Landsat 8 surface reflectance data.
l8sr = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')

# Cloud masking and band processing function.
def maskL8sr(image):
    cloudShadowBitMask = ee.Number(2).pow(3).int()
    cloudsBitMask = ee.Number(2).pow(5).int()
    qa = image.select('pixel_qa')
    mask = qa.bitwiseAnd(cloudShadowBitMask).eq(0).And(qa.bitwiseAnd(cloudsBitMask).eq(0))
    return image.updateMask(mask).select(bands).divide(10000)

# The image input data is a 2017 cloud-masked median composite.
image = l8sr.filterDate('2017-01-01', '2017-12-31').map( maskL8sr ).median()
mapIdDict = image.getMapId( {'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 0.3} )

# Use folium to visualize the imagery.
f = folium.Figure(width=800, height=500)
m = folium.Map(location=[42., -90.5])
t = folium.TileLayer(
    tiles = mapIdDict['tile_fetcher'].url_format,
    attr = 'Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay = True,
    name = '2017 cloud masked median',
    ).add_to(m)
m.add_child(folium.LayerControl())
f.add_child(m)
f



---


# REF


---



---



In [None]:
%%capture 
### make a file in the root with the api location and your personal key
!echo "url: https://cds.climate.copernicus.eu/api/v2" > $HOME/.cdsapirc
!echo "key: yourkey" >> $HOME/.cdsapirc

!pip install cdsapi
import cdsapi

In [None]:
### open the client ###
c = cdsapi.Client()
### download data   ###
year = 2019
c.retrieve('reanalysis-era5-pressure-levels',
        {'product_type':   'reanalysis',
            'format':         'netcdf',
            'variable':       'ozone_mass_mixing_ratio',
            'month':          ['01','02','03', '11', '12'],
            'time':           ['00:00', '12:00'],
            'pressure_level': ['350', '250'],
            'year':           [f'year}'],
            'day': [ '01','02','03','04','05','06','07','08','09','10','11','12','13','14','15',
                    '16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31' ],
            }, filter'350_ozone_era5_3hr_{year}.nc')

shutil.move(f'/content/350_ozone_era5_3hr_{year}.nc', f'/content/drive/My Drive/data/350_ozone_era5_3hr_{year}.nc') ) 