In [14]:
import numpy as np
import pandas as pd
import rasterio as reo
from rasterio.enums import Resampling
from rasterio.crs import CRS
import sklearn
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt
import rioxarray
import re
import os
import xarray as xr
import datetime as dt
from datetime import datetime
import argparse
import rioxarray
from rasterstats import point_query
from geopy.distance import geodesic

In [2]:
lcz_filepath = "/ihme/homes/nhashmeh/downscale_temperature/climate_zones/lcz_filter_v2.tif"
stations_path = '/ihme/homes/nhashmeh/downscale_temperature/global_summaries/'

In [3]:
# load in station data
print("Loading in station data")
stations_filename = '1990_all_data.csv'
station_data = pd.read_csv(stations_path + stations_filename) 
# break

# processing station data...
station_data.columns = station_data.columns.str.lower()
station_data.drop(columns="station", inplace=True) # don't need station numbers...
station_data.rename(columns={'date': 'time'}, inplace = True)
station_data['time'] = pd.to_datetime(station_data['time'])
station_data = station_data.dropna(how = 'any', subset = ['latitude', 'longitude', 'temp', 'elevation']) # drop rows where there are no coords (data isn't always clean...)
station_data['temp'] = (station_data['temp'] - 32) * 5/9 # convert to C

grouped_stations = station_data.groupby(station_data['time'].dt.month)

group_1 = grouped_stations.get_group(1)

gdf_station = gpd.GeoDataFrame(group_1, geometry=gpd.points_from_xy(group_1.longitude, group_1.latitude))

Loading in station data


In [4]:
gdf_station

Unnamed: 0,time,latitude,longitude,elevation,temp,geometry
0,1990-01-01,46.000000,26.133333,569.0,-8.888889,POINT (26.13333 46.00000)
1,1990-01-02,46.000000,26.133333,569.0,-8.222222,POINT (26.13333 46.00000)
2,1990-01-03,46.000000,26.133333,569.0,-8.000000,POINT (26.13333 46.00000)
3,1990-01-04,46.000000,26.133333,569.0,-11.444444,POINT (26.13333 46.00000)
4,1990-01-05,46.000000,26.133333,569.0,-19.555556,POINT (26.13333 46.00000)
...,...,...,...,...,...,...
2615806,1990-01-27,14.916667,-92.250000,118.0,25.444444,POINT (-92.25000 14.91667)
2615807,1990-01-28,14.916667,-92.250000,118.0,26.444444,POINT (-92.25000 14.91667)
2615808,1990-01-29,14.916667,-92.250000,118.0,26.777778,POINT (-92.25000 14.91667)
2615809,1990-01-30,14.916667,-92.250000,118.0,25.000000,POINT (-92.25000 14.91667)


In [99]:
now = datetime.now()
current_time = now.strftime("%I:%M:%S %p")
print(current_time)
# Open your raster file
with reo.open(lcz_filepath) as src:
    # Transpose your coordinate pairs and sample from the raster
    raster_values = [value for value in src.sample(np.transpose((gdf_station.geometry.x, gdf_station.geometry.y)))]
    raster_lat = [value for value in src.sample(np.transpose((gdf_station.geometry.x, gdf_station.geometry.y)))]
    # Add the raster values to your dataframe
    gdf_station['band_1'] = raster_values
    gdf_station['band_1'] = gdf_station['band_1'].apply(lambda x: x[0])

02:47:10 PM


In [12]:
now = datetime.now()
current_time = now.strftime("%I:%M:%S %p")
print(current_time)
# Open your raster file
with reo.open(lcz_filepath) as src:
    # Get geographic coordinates
    coords = np.transpose((gdf_station.geometry.x, gdf_station.geometry.y))

    # Transpose your coordinate pairs, sample from the raster, and get raster coordinates
    raster_values_and_coords = [(value, src.xy(*src.index(*coord))) for coord, value in zip(coords, src.sample(coords))]

    # Separate values and coordinates
    raster_values = [value[0] for value in raster_values_and_coords]
    raster_coords = [coord for value, coord in raster_values_and_coords]

    # Add the raster values and coordinates to your dataframe
    gdf_station['raster_value'] = raster_values
    gdf_station['raster_coords'] = raster_coords
    # Split 'raster_coords' into 'latitude' and 'longitude'
    gdf_station[['lcz_longitude', 'lcz_latitude']] = gdf_station['raster_coords'].apply(pd.Series)

03:33:53 PM


In [17]:
# Use apply to calculate the distance between each pair of coordinates
gdf_station['lcz_distance_meters'] = gdf_station.apply(lambda row: geodesic(row[['latitude', 'longitude']], row[['lcz_latitude', 'lcz_longitude']]).meters, axis=1)

KeyboardInterrupt: 

In [13]:
gdf_station

Unnamed: 0,time,latitude,longitude,elevation,temp,geometry,raster_value,raster_coords,lcz_longitude,lcz_latitude
0,1990-01-01,45.999582,26.133339,569.0,-8.888889,POINT (26.13333 46.00000),[6],"(26.13333908796227, 45.999581596266424)",26.133339,45.999582
1,1990-01-02,45.999582,26.133339,569.0,-8.222222,POINT (26.13333 46.00000),[6],"(26.13333908796227, 45.999581596266424)",26.133339,45.999582
2,1990-01-03,45.999582,26.133339,569.0,-8.000000,POINT (26.13333 46.00000),[6],"(26.13333908796227, 45.999581596266424)",26.133339,45.999582
3,1990-01-04,45.999582,26.133339,569.0,-11.444444,POINT (26.13333 46.00000),[6],"(26.13333908796227, 45.999581596266424)",26.133339,45.999582
4,1990-01-05,45.999582,26.133339,569.0,-19.555556,POINT (26.13333 46.00000),[6],"(26.13333908796227, 45.999581596266424)",26.133339,45.999582
...,...,...,...,...,...,...,...,...,...,...
2615806,1990-01-27,14.916974,-92.250242,118.0,25.444444,POINT (-92.25000 14.91667),[6],"(-92.25024231444428, 14.916974450446986)",-92.250242,14.916974
2615807,1990-01-28,14.916974,-92.250242,118.0,26.444444,POINT (-92.25000 14.91667),[6],"(-92.25024231444428, 14.916974450446986)",-92.250242,14.916974
2615808,1990-01-29,14.916974,-92.250242,118.0,26.777778,POINT (-92.25000 14.91667),[6],"(-92.25024231444428, 14.916974450446986)",-92.250242,14.916974
2615809,1990-01-30,14.916974,-92.250242,118.0,25.000000,POINT (-92.25000 14.91667),[6],"(-92.25024231444428, 14.916974450446986)",-92.250242,14.916974


In [102]:
gdf_station['band_1'].value_counts()

14    77370
0     22604
16    21102
6     20605
11    20055
8     12550
12    10803
9     10747
17     5972
15     3600
3      3401
5      1982
2      1927
13     1684
4       610
1       237
7       169
10       83
Name: band_1, dtype: int64

In [79]:
da.band_1.sel(x="-122.329038", y="37.808930", method='nearest').values

array(8, dtype=uint8)