In [7]:
# -----------------------------------------------------------------------------
# Load Data: Read CSV files and inspect data
# -----------------------------------------------------------------------------
import sys
sys.path.append('..')

from baseline.utilities import *

from pandarallel import pandarallel

pandarallel.initialize(progress_bar=False, nb_workers=8)


SENTINEL_TIFF_PATH = '../baseline/S2_sample.tiff' # './S2_sample_5res.tiff'
LANDSAT_TIFF_PATH = '../baseline/Landsat_LST.tiff'
MODE = 'submission'  # 'submission' 'train'

if MODE == 'train':
    ground_df = pd.read_csv("../baseline/Training_data_uhi_index.csv")
elif MODE == 'submission':
    ground_df = pd.read_csv("../baseline/Submission_template.csv")
else:
    raise ValueError("MODE should be either 'train' or 'submission")

display(ground_df[['Longitude', 'Latitude']].describe())

INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


Unnamed: 0,Longitude,Latitude
count,1040.0,1040.0
mean,-73.934816,40.807991
std,0.028661,0.0232
min,-73.993163,40.758877
25%,-73.95703,40.790802
50%,-73.934618,40.809553
75%,-73.910655,40.823054
max,-73.879537,40.859243


In [8]:
# -----------------------------------------------------------------------------
# Feature Engineering: Explore the NY MESONET Weather data
# -----------------------------------------------------------------------------
ny_bronx_point = (40.87248, -73.89352)
ny_manhattan_point = (40.76754, -73.96449)

ny_mesonet_bronx_df = pd.read_excel('../baseline/NY_Mesonet_Weather.xlsx', sheet_name='Bronx')
ny_mesonet_manhattan_df = pd.read_excel('../baseline/NY_Mesonet_Weather.xlsx', sheet_name='Manhattan')

ny_mesonet_bronx_df['Date / Time'] = pd.to_datetime(ny_mesonet_bronx_df['Date / Time'])
ny_mesonet_manhattan_df['Date / Time'] = pd.to_datetime(ny_mesonet_manhattan_df['Date / Time'])

ny_mesonet_bronx_df = ny_mesonet_bronx_df.sort_values('Date / Time')
ny_mesonet_manhattan_df = ny_mesonet_manhattan_df.sort_values('Date / Time')

print(ny_mesonet_bronx_df.dtypes)
print(ny_mesonet_manhattan_df.dtypes)

Date / Time                    datetime64[ns]
Air Temp at Surface [degC]            float64
Relative Humidity [percent]           float64
Avg Wind Speed [m/s]                  float64
Wind Direction [degrees]                int64
Solar Flux [W/m^2]                      int64
dtype: object
Date / Time                    datetime64[ns]
Air Temp at Surface [degC]            float64
Relative Humidity [percent]           float64
Avg Wind Speed [m/s]                  float64
Wind Direction [degrees]                int64
Solar Flux [W/m^2]                      int64
dtype: object


In [9]:
# -----------------------------------------------------------------------------
# Filter Data: Select Weather Data from 3 PM to 4 PM
# -----------------------------------------------------------------------------

ny_mesonet_bronx_df = ny_mesonet_bronx_df[
    (ny_mesonet_bronx_df['Date / Time'].dt.hour == 15) |
    ((ny_mesonet_bronx_df['Date / Time'].dt.hour == 16) & (ny_mesonet_bronx_df['Date / Time'].dt.minute == 0))
].reset_index(drop=True)
display(ny_mesonet_bronx_df)

ny_mesonet_manhattan_df = ny_mesonet_manhattan_df[
    (ny_mesonet_manhattan_df['Date / Time'].dt.hour == 15) |
    ((ny_mesonet_manhattan_df['Date / Time'].dt.hour == 16) & (ny_mesonet_manhattan_df['Date / Time'].dt.minute == 0))
].reset_index(drop=True)
display(ny_mesonet_manhattan_df)

Unnamed: 0,Date / Time,Air Temp at Surface [degC],Relative Humidity [percent],Avg Wind Speed [m/s],Wind Direction [degrees],Solar Flux [W/m^2]
0,2021-07-24 15:00:00,28.0,40.3,3.0,75,725
1,2021-07-24 15:05:00,28.1,40.2,1.7,92,558
2,2021-07-24 15:10:00,28.3,40.3,2.9,91,216
3,2021-07-24 15:15:00,28.0,40.7,3.1,114,236
4,2021-07-24 15:20:00,27.9,41.8,2.8,105,229
5,2021-07-24 15:25:00,27.3,44.4,3.7,162,511
6,2021-07-24 15:30:00,27.1,47.3,4.5,170,563
7,2021-07-24 15:35:00,26.9,47.7,3.5,149,292
8,2021-07-24 15:40:00,26.9,48.3,3.0,166,371
9,2021-07-24 15:45:00,27.3,47.4,3.5,146,646


Unnamed: 0,Date / Time,Air Temp at Surface [degC],Relative Humidity [percent],Avg Wind Speed [m/s],Wind Direction [degrees],Solar Flux [W/m^2]
0,2021-07-24 15:00:00,26.1,51.1,4.1,139,140
1,2021-07-24 15:05:00,26.3,51.1,2.5,161,128
2,2021-07-24 15:10:00,26.3,50.9,3.0,158,219
3,2021-07-24 15:15:00,26.6,50.5,3.1,154,584
4,2021-07-24 15:20:00,26.7,49.7,2.0,132,448
5,2021-07-24 15:25:00,27.2,46.4,1.4,175,725
6,2021-07-24 15:30:00,27.3,45.4,3.8,202,349
7,2021-07-24 15:35:00,26.8,47.6,2.4,209,511
8,2021-07-24 15:40:00,27.0,47.2,3.2,142,658
9,2021-07-24 15:45:00,27.1,47.6,3.2,163,565


In [10]:
# -----------------------------------------------------------------------------
# Calculate Distances and Bearing: Distance to Bronx and Manhattan
# -----------------------------------------------------------------------------
ny_mesonet_features = ground_df[['Latitude', 'Longitude']].copy()
ny_mesonet_features.columns = ny_mesonet_features.columns.str.lower()

# Calculate the distance from each point to the Bronx and Manhattan
ny_mesonet_features['distance_bronx'] = ny_mesonet_features.parallel_apply(
    lambda x: distance_meters(x, ny_bronx_point),
    axis=1
)
ny_mesonet_features['distance_manhattan'] = ny_mesonet_features.parallel_apply(
    lambda x: distance_meters(x, ny_manhattan_point),
    axis=1
)

# Calculate the ratio of distances between Bronx and Manhattan
ny_mesonet_features['ratio_dist_bronx_manhattan'] = (
    ny_mesonet_features['distance_bronx'] / (
        ny_mesonet_features['distance_manhattan'] + ny_mesonet_features['distance_bronx']
    )
)
ny_mesonet_features['ratio_dist_manhattan_bronx'] = (
    ny_mesonet_features['distance_manhattan'] / (
        ny_mesonet_features['distance_manhattan'] + ny_mesonet_features['distance_bronx']
    )
)

In [11]:
# -----------------------------------------------------------------------------
# Weather Data Pivot: Process Wind and Speed Data
# -----------------------------------------------------------------------------
ny_mesonet_features['bearing_bronx'] = ny_mesonet_features[['latitude', 'longitude']].parallel_apply(
    lambda x: compute_bearing(ny_bronx_point, (x['latitude'], x['longitude'])),
    axis=1
)

ny_mesonet_features['bearing_manhattan'] = ny_mesonet_features[['latitude', 'longitude']].parallel_apply(
    lambda x: compute_bearing(ny_manhattan_point, (x['latitude'], x['longitude'])),
    axis=1
)

ny_mesonet_bm_df = pd.concat([ny_mesonet_bronx_df.assign(location='bronx'), ny_mesonet_manhattan_df.assign(location='manhattan')], axis=0)

ny_mesonet_bm_wind_dir_pivot = ny_mesonet_bm_df.pivot(
    index="location", 
    columns="Date / Time", 
    values="Wind Direction [degrees]"
)
ny_mesonet_bm_wind_dir_pivot.columns = [f"Wind Direction [degrees] {col}" for col in ny_mesonet_bm_wind_dir_pivot.columns]
ny_mesonet_bm_wind_dir_pivot = ny_mesonet_bm_wind_dir_pivot.reset_index(drop=False)

nymesonet_bm_avg_wind_speed_pivot = ny_mesonet_bm_df.pivot(
    index="location",
    columns="Date / Time",
    values="Avg Wind Speed [m/s]"
)
nymesonet_bm_avg_wind_speed_pivot.columns = [f"Avg Wind Speed [m/s] {col}" for col in nymesonet_bm_avg_wind_speed_pivot.columns]
nymesonet_bm_avg_wind_speed_pivot = nymesonet_bm_avg_wind_speed_pivot.reset_index(drop=False)

ny_mesonet_bm_df_pivot = pd.merge(
    ny_mesonet_bm_wind_dir_pivot, nymesonet_bm_avg_wind_speed_pivot, on="location"
)

display(ny_mesonet_bm_df_pivot) # .to_dict(orient='tight')

Unnamed: 0,location,Wind Direction [degrees] 2021-07-24 15:00:00,Wind Direction [degrees] 2021-07-24 15:05:00,Wind Direction [degrees] 2021-07-24 15:10:00,Wind Direction [degrees] 2021-07-24 15:15:00,Wind Direction [degrees] 2021-07-24 15:20:00,Wind Direction [degrees] 2021-07-24 15:25:00,Wind Direction [degrees] 2021-07-24 15:30:00,Wind Direction [degrees] 2021-07-24 15:35:00,Wind Direction [degrees] 2021-07-24 15:40:00,Wind Direction [degrees] 2021-07-24 15:45:00,Wind Direction [degrees] 2021-07-24 15:50:00,Wind Direction [degrees] 2021-07-24 15:55:00,Wind Direction [degrees] 2021-07-24 16:00:00,Avg Wind Speed [m/s] 2021-07-24 15:00:00,Avg Wind Speed [m/s] 2021-07-24 15:05:00,Avg Wind Speed [m/s] 2021-07-24 15:10:00,Avg Wind Speed [m/s] 2021-07-24 15:15:00,Avg Wind Speed [m/s] 2021-07-24 15:20:00,Avg Wind Speed [m/s] 2021-07-24 15:25:00,Avg Wind Speed [m/s] 2021-07-24 15:30:00,Avg Wind Speed [m/s] 2021-07-24 15:35:00,Avg Wind Speed [m/s] 2021-07-24 15:40:00,Avg Wind Speed [m/s] 2021-07-24 15:45:00,Avg Wind Speed [m/s] 2021-07-24 15:50:00,Avg Wind Speed [m/s] 2021-07-24 15:55:00,Avg Wind Speed [m/s] 2021-07-24 16:00:00
0,bronx,75,92,91,114,105,162,170,149,166,146,157,165,164,3.0,1.7,2.9,3.1,2.8,3.7,4.5,3.5,3.0,3.5,3.2,2.6,2.8
1,manhattan,139,161,158,154,132,175,202,209,142,163,184,196,209,4.1,2.5,3.0,3.1,2.0,1.4,3.8,2.4,3.2,3.2,3.5,3.4,2.7


In [12]:
# -----------------------------------------------------------------------------
# Weather Influence: Calculate Wind Influence Based on Bearing
# -----------------------------------------------------------------------------
ny_mesonet_bm_dict = {
    "location": {
        row["location"]: {
            col : row[col]  # f"Wind Direction {col.split()[-1]}"
            for col in ny_mesonet_bm_df_pivot.columns if col != "location"
        }
        for _, row in ny_mesonet_bm_df_pivot.iterrows()
    }
}
print(json.dumps(ny_mesonet_bm_dict, indent=2))

for loc in ny_mesonet_bm_dict['location']:
    print(loc)
    for k, v in ny_mesonet_bm_dict['location'][loc].items():
        print(f"{k}: {v}")
        if k.startswith('Wind Direction'):    
            ny_mesonet_features[f"Wind Influence {k.split()[-1]} {loc}"] = ny_mesonet_features[f'bearing_{loc}'].parallel_apply(
                lambda x: np.cos(np.radians(v - x))
            )

{
  "location": {
    "bronx": {
      "Wind Direction [degrees] 2021-07-24 15:00:00": 75,
      "Wind Direction [degrees] 2021-07-24 15:05:00": 92,
      "Wind Direction [degrees] 2021-07-24 15:10:00": 91,
      "Wind Direction [degrees] 2021-07-24 15:15:00": 114,
      "Wind Direction [degrees] 2021-07-24 15:20:00": 105,
      "Wind Direction [degrees] 2021-07-24 15:25:00": 162,
      "Wind Direction [degrees] 2021-07-24 15:30:00": 170,
      "Wind Direction [degrees] 2021-07-24 15:35:00": 149,
      "Wind Direction [degrees] 2021-07-24 15:40:00": 166,
      "Wind Direction [degrees] 2021-07-24 15:45:00": 146,
      "Wind Direction [degrees] 2021-07-24 15:50:00": 157,
      "Wind Direction [degrees] 2021-07-24 15:55:00": 165,
      "Wind Direction [degrees] 2021-07-24 16:00:00": 164,
      "Avg Wind Speed [m/s] 2021-07-24 15:00:00": 3.0,
      "Avg Wind Speed [m/s] 2021-07-24 15:05:00": 1.7,
      "Avg Wind Speed [m/s] 2021-07-24 15:10:00": 2.9,
      "Avg Wind Speed [m/s] 2021-07-24

In [None]:
# -----------------------------------------------------------------------------
# Final Adjustments: Clean Up and Save Data
# -----------------------------------------------------------------------------
# Clean up the feature columns and drop unnecessary columns
ny_mesonet_features.columns = [
    col.replace(' ', '_').lower() for col in ny_mesonet_features.columns
]

display(ny_mesonet_features)

# Drop columns that are no longer needed
ny_mesonet_features = ny_mesonet_features.drop(
    columns=['latitude', 'longitude', 'distance_bronx', 'distance_manhattan', 'ratio_dist_bronx_manhattan', 'ratio_dist_manhattan_bronx']
)

# create a list of hours strings from 15 to 16 every 5 minutes
hours = [f"{h:02d}:{m:02d}:00" for h in range(15, 16) for m in range(0, 60, 5)] + ['16:00:00']

for place in ['bronx', 'manhattan']:
    for idx in range(1, len(hours)):
        ny_mesonet_features[f"diff_wind_influence_{hours[idx]}_{place}"] = ny_mesonet_features[f"wind_influence_{hours[idx]}_{place}"] - ny_mesonet_features[f"wind_influence_{hours[idx-1]}_{place}"]
        ny_mesonet_features[f"pct_change_wind_influence_{hours[idx]}_{place}"] = ny_mesonet_features[f"wind_influence_{hours[idx]}_{place}"] / ny_mesonet_features[f"wind_influence_{hours[idx-1]}_{place}"] - 1


for place in ['bronx', 'manhattan']:
    for idx in range(1, len(hours)):
        ny_mesonet_features = ny_mesonet_features.drop(columns=[f"diff_wind_influence_{hours[idx]}_{place}"])

# Save the processed data to a parquet file
ny_mesonet_features.to_parquet(f'../pipeline/data/processed/{MODE}/ny_mesonet_features.parquet')

Unnamed: 0,latitude,longitude,distance_bronx,distance_manhattan,ratio_dist_bronx_manhattan,ratio_dist_manhattan_bronx,bearing_bronx,bearing_manhattan,wind_influence_15:00:00_bronx,wind_influence_15:05:00_bronx,wind_influence_15:10:00_bronx,wind_influence_15:15:00_bronx,wind_influence_15:20:00_bronx,wind_influence_15:25:00_bronx,wind_influence_15:30:00_bronx,wind_influence_15:35:00_bronx,wind_influence_15:40:00_bronx,wind_influence_15:45:00_bronx,wind_influence_15:50:00_bronx,wind_influence_15:55:00_bronx,wind_influence_16:00:00_bronx,wind_influence_15:00:00_manhattan,wind_influence_15:05:00_manhattan,wind_influence_15:10:00_manhattan,wind_influence_15:15:00_manhattan,wind_influence_15:20:00_manhattan,wind_influence_15:25:00_manhattan,wind_influence_15:30:00_manhattan,wind_influence_15:35:00_manhattan,wind_influence_15:40:00_manhattan,wind_influence_15:45:00_manhattan,wind_influence_15:50:00_manhattan,wind_influence_15:55:00_manhattan,wind_influence_16:00:00_manhattan
0,40.788763,-73.971665,11396.435733,2433.427139,0.824045,0.175955,215.258656,345.642869,-0.768938,-0.548420,-0.562930,-0.195238,-0.346259,0.598204,0.703907,0.402608,0.652645,0.354150,0.526085,0.639323,0.625806,-0.893819,-0.996719,-0.991116,-0.979425,-0.832507,-0.986694,-0.805338,-0.727089,-0.916063,-0.998936,-0.949112,-0.862892,-0.727089
1,40.788875,-73.971928,11399.191433,2451.040042,0.823033,0.176967,215.385684,345.213281,-0.770354,-0.550272,-0.564761,-0.197412,-0.348338,0.596425,0.702331,0.400578,0.650964,0.352076,0.524199,0.637616,0.624075,-0.897156,-0.997297,-0.992086,-0.980910,-0.836637,-0.985447,-0.800870,-0.721921,-0.919044,-0.999254,-0.946724,-0.859079,-0.721921
2,40.789080,-73.967080,11147.959828,2401.980340,0.822731,0.177269,213.742044,354.798302,-0.751748,-0.526096,-0.540858,-0.169213,-0.321308,0.619203,0.722460,0.426694,0.672470,0.378777,0.548409,0.659450,0.646230,-0.811081,-0.971141,-0.957328,-0.934836,-0.733750,-0.999994,-0.889403,-0.827064,-0.840583,-0.978873,-0.987132,-0.932313,-0.827064
3,40.789082,-73.972550,11411.054324,2487.073279,0.821050,0.178950,215.666804,344.183684,-0.773473,-0.554362,-0.568803,-0.202220,-0.352933,0.592480,0.698830,0.396078,0.647232,0.347479,0.520014,0.633829,0.620234,-0.904948,-0.998457,-0.994182,-0.984246,-0.846345,-0.982234,-0.789980,-0.709370,-0.925978,-0.999787,-0.940784,-0.849743,-0.709370
4,40.787953,-73.969697,11375.351888,2309.110204,0.831260,0.168740,214.315156,349.069885,-0.758307,-0.534576,-0.549244,-0.179062,-0.330764,0.611318,0.715508,0.417627,0.665033,0.369501,0.540018,0.651898,0.638564,-0.865415,-0.990098,-0.981394,-0.965609,-0.797901,-0.994649,-0.839334,-0.766828,-0.890452,-0.994394,-0.966241,-0.891560,-0.766828
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1035,40.813803,-73.919388,6871.595807,6394.107691,0.517997,0.482003,198.452702,36.415964,-0.551248,-0.283224,-0.299918,0.096667,-0.060225,0.804348,0.879211,0.650076,0.843835,0.609416,0.749502,0.834341,0.824593,-0.217871,-0.567614,-0.523749,-0.463049,-0.097306,-0.749927,-0.968514,-0.991635,-0.268651,-0.596001,-0.844179,-0.937185,-0.991635
1036,40.833178,-73.931033,5390.234809,7816.835416,0.408133,0.591867,215.840584,21.088063,-0.775392,-0.556884,-0.571295,-0.205189,-0.355769,0.590034,0.696657,0.393291,0.644917,0.344633,0.517421,0.631480,0.617852,-0.468114,-0.765056,-0.730305,-0.680873,-0.356933,-0.898119,-0.999873,-0.990481,-0.513720,-0.787064,-0.955854,-0.996060,-0.990481
1037,40.854542,-73.934647,3998.840314,9984.296513,0.285976,0.714024,240.039609,14.544145,-0.966105,-0.848414,-0.857523,-0.588344,-0.707595,0.207235,0.341370,-0.018144,0.274973,-0.070446,0.121183,0.258151,0.241251,-0.565771,-0.833460,-0.803398,-0.759905,-0.461065,-0.942384,-0.991545,-0.968340,-0.608150,-0.852237,-0.983114,-0.999677,-0.968340
1038,40.815413,-73.917223,6645.099794,6646.640044,0.499942,0.500058,197.451449,36.762612,-0.536585,-0.266422,-0.283203,0.114045,-0.042773,0.814607,0.887402,0.663254,0.853083,0.623178,0.760956,0.843846,0.834353,-0.211963,-0.562623,-0.518585,-0.457678,-0.091282,-0.745911,-0.966990,-0.990836,-0.262819,-0.591132,-0.840920,-0.935057,-0.990836
