In [1]:
import pandas as pd

url = "https://data.nasa.gov/docs/legacy/meteorite_landings/Meteorite_Landings.csv"

df = pd.read_csv(url)

print(df)

             name     id nametype              recclass  mass (g)   fall  \
0          Aachen      1    Valid                    L5      21.0   Fell   
1          Aarhus      2    Valid                    H6     720.0   Fell   
2            Abee      6    Valid                   EH4  107000.0   Fell   
3        Acapulco     10    Valid           Acapulcoite    1914.0   Fell   
4         Achiras    370    Valid                    L6     780.0   Fell   
...           ...    ...      ...                   ...       ...    ...   
45711  Zillah 002  31356    Valid               Eucrite     172.0  Found   
45712      Zinder  30409    Valid  Pallasite, ungrouped      46.0  Found   
45713        Zlin  30410    Valid                    H4       3.3  Found   
45714   Zubkovsky  31357    Valid                    L6    2167.0  Found   
45715  Zulu Queen  30414    Valid                  L3.7     200.0  Found   

         year    reclat    reclong             GeoLocation  
0      1880.0  50.77500   

In [2]:
df.info

<bound method DataFrame.info of              name     id nametype              recclass  mass (g)   fall  \
0          Aachen      1    Valid                    L5      21.0   Fell   
1          Aarhus      2    Valid                    H6     720.0   Fell   
2            Abee      6    Valid                   EH4  107000.0   Fell   
3        Acapulco     10    Valid           Acapulcoite    1914.0   Fell   
4         Achiras    370    Valid                    L6     780.0   Fell   
...           ...    ...      ...                   ...       ...    ...   
45711  Zillah 002  31356    Valid               Eucrite     172.0  Found   
45712      Zinder  30409    Valid  Pallasite, ungrouped      46.0  Found   
45713        Zlin  30410    Valid                    H4       3.3  Found   
45714   Zubkovsky  31357    Valid                    L6    2167.0  Found   
45715  Zulu Queen  30414    Valid                  L3.7     200.0  Found   

         year    reclat    reclong             GeoLocat

In [3]:
#df.info()

In [10]:
df_mass = df[['name', 'mass (g)']]
df_mass

Unnamed: 0,name,mass (g)
0,Aachen,21.0
1,Aarhus,720.0
2,Abee,107000.0
3,Acapulco,1914.0
4,Achiras,780.0
...,...,...
45711,Zillah 002,172.0
45712,Zinder,46.0
45713,Zlin,3.3
45714,Zubkovsky,2167.0


# working with XARRAY

In [14]:
import xarray as xr
url = 'http://iridl.ldeo.columbia.edu/expert/SOURCES/.NOAA/.NCEP/.CPC/.UNIFIED_PRCP/.GAUGE_BASED/.GLOBAL/.v1p0/.Monthly/.RETRO/.rain/dods'
#decode_time=False is required because the IRI Data Library uses non-standard encoding of times
ds = xr.open_dataset(url, decode_times=False)

In [15]:
print(ds.dims)



In [16]:
ds.data_vars

Data variables:
    rain     (T, Y, X) float32 336MB ...

In [17]:
ds.coords

Coordinates:
  * T        (T) float32 1kB 228.5 229.5 230.5 231.5 ... 548.5 549.5 550.5 551.5
  * Y        (Y) float32 1kB -89.75 -89.25 -88.75 -88.25 ... 88.75 89.25 89.75
  * X        (X) float32 3kB 0.25 0.75 1.25 1.75 ... 358.2 358.8 359.2 359.8

In [18]:
ds.attrs

{'Conventions': 'IRIDL'}

In [19]:
ds['rain'].attrs

{'pointwidth': 0,
 'standard_name': 'lwe_precipitation_rate',
 'file_missing_value': -999.0,
 'history': 'Boxes with less than 0.0% dropped',
 'units': 'mm/day',
 'long_name': 'Monthly Precipitation'}

In [21]:
ds['T'].attrs

{'pointwidth': 1.0,
 'calendar': '360',
 'gridtype': 0,
 'units': 'months since 1960-01-01'}

In [24]:
#selecting by variable value
ds.sel(T=228.5)

In [25]:
ds.sel(Y=89.75, X=0.25)

In [27]:
ds.isel(T=0)

In [29]:
ds.sel(T=slice(229.5, 300.5))

In [40]:
ds_refined = ds.sel(X=slice(0.25, 0.75), Y=slice(-89.75,-86.0), T=slice(229.5, 300.5))
ds_refined

In [None]:
#save a net CDF file

In [41]:
ds_refined.to_netcdf("filtered_NETCDF.nc")

In [44]:
ds1 = xr.open_dataset("filtered_NETCDF.nc", decode_times=False)
ds1

# Working with Pooch

In [45]:
import pooch
POOCH = pooch.create(
    path=pooch.os_cache("2017_Antarctica_P3/CSARP_mvdr"),
    base_url="https://data.cresis.ku.edu/data/rds/2017_Antarctica_P3/CSARP_mvdr/20171124_03/",
    registry={
        "Data_img_02_20171124_03_020.mat": None,
    }
)

local_fname = POOCH.fetch("Data_img_02_20171124_03_020.mat")
local_fname

Downloading file 'Data_img_02_20171124_03_020.mat' from 'https://data.cresis.ku.edu/data/rds/2017_Antarctica_P3/CSARP_mvdr/20171124_03/Data_img_02_20171124_03_020.mat' to '/home/enh2134/.cache/2017_Antarctica_P3/CSARP_mvdr'.


'/home/enh2134/.cache/2017_Antarctica_P3/CSARP_mvdr/Data_img_02_20171124_03_020.mat'

In [46]:
'/home/enh2134/.cache/2017_Antarctica_P3/CSARP_mvdr/Data_img_02_20171124_03_020.mat'

'/home/enh2134/.cache/2017_Antarctica_P3/CSARP_mvdr/Data_img_02_20171124_03_020.mat'

In [47]:
import h5py # This library is sued to read HDF5 files
f = h5py.File(local_fname)
f

<HDF5 file "Data_img_02_20171124_03_020.mat" (mode r)>

In [48]:
list(f)

['#refs#',
 'Bottom',
 'Data',
 'Elevation',
 'GPS_time',
 'Heading',
 'Latitude',
 'Longitude',
 'Pitch',
 'Roll',
 'Surface',
 'Time',
 'param_combine',
 'param_csarp',
 'param_records']

In [50]:
POOCH = pooch.create(
    path=pooch.os_cache("greenland_ice_sheet"),
    base_url="https://zenodo.org/record/4977910/files/",
    registry={
        "vel_2010-07-01_2011-06-31.nc": "md5:80ad1a3c381af185069bc032a6459745",
    }
)

fname = POOCH.fetch("vel_2010-07-01_2011-06-31.nc")
fname

'/home/enh2134/.cache/greenland_ice_sheet/vel_2010-07-01_2011-06-31.nc'

In [53]:
ds = xr.open_dataset(fname)
ds