In [2]:
import pandas as pd
import requests as r
import geopandas as gpd
import numpy as np
import pandas as pd

from scipy.spatial import cKDTree
from shapely.geometry import Point

In [19]:
flights_train = pd.read_csv("./train_emiel_v4_w_times.csv")
airports = pd.read_csv("./airports.csv")

In [20]:
airports[airports.LATITUDE.isna()]

Unnamed: 0,IATA_CODE,AIRPORT,CITY,STATE,COUNTRY,LATITUDE,LONGITUDE
96,ECP,Northwest Florida Beaches International Airport,Panama City,FL,USA,,
234,PBG,Plattsburgh International Airport,Plattsburgh,NY,USA,,
313,UST,Northeast Florida Regional Airport (St. August...,St. Augustine,FL,USA,,


In [21]:
# Fix ECP
airports.loc[96,["LATITUDE", "LONGITUDE"]] = (30.3548543,-85.8017021)
# Fix PBG
airports.loc[234,["LATITUDE", "LONGITUDE"]] = (44.6520597,-73.470109)
# Fix UST
airports.loc[313,["LATITUDE", "LONGITUDE"]] = (29.95439,-81.3450803)

In [None]:
df = pd.DataFrame()
for airport in airports.itertuples():
    station = airport[1]
    url = f"https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station={station}&data=tmpc&data=sknt&data=p01m&data=vsby&data=gust&data=skyc1&data=skyc2&data=skyc3&data=wxcodes&data=ice_accretion_6hr&data=snowdepth&year1=2015&month1=1&day1=1&year2=2015&month2=8&day2=1&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=empty&trace=T&direct=no&report_type=1&report_type=2"
    df = df.append(pd.read_csv(url))

In [None]:
df["valid"] = pd.to_datetime(df["valid"])
df

In [259]:
df.gust = df.gust.fillna(0)
df

0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
5563     0.0
5564     0.0
5565     0.0
5566     0.0
5567    21.0
Name: gust, Length: 2187706, dtype: float64

In [233]:
# Creating a geometry column for all airports
geometry = [Point(xy) for xy in zip(airports['LONGITUDE'], airports['LATITUDE'])]
# Creating a Geographic data frame for all airports
gpd1 = gpd.GeoDataFrame(airports, geometry=geometry).reset_index(drop=True)

# Get all the airports for which we couldn't retrieve any data
missing_airports = set(airports.IATA_CODE.unique()) - set(df.station.unique())
# Create geo data frame for the missing airports
gpd2 = gpd1[gpd1["IATA_CODE"].isin(missing_airports)].reset_index(drop=True)
# Change the column name to be identifiable later
gpd2["MISSING_IATA_CODE"] = gpd2["IATA_CODE"]
gpd2 = gpd2.drop(columns="IATA_CODE")

In [237]:
def ckdnearest(gdA, gdB):
    """
    Function to compute pairwise distances between all points in gdA and gdB
    Found in: https://gis.stackexchange.com/a/301935
    """
    nA = np.array(list(gdA.geometry.apply(lambda x: (x.x, x.y))))
    nB = np.array(list(gdB.geometry.apply(lambda x: (x.x, x.y))))
    btree = cKDTree(nB)
    dist, idx = btree.query(nA, k=1)
    gdB_nearest = gdB.iloc[idx].drop(columns="geometry").reset_index(drop=True)
    gdf = pd.concat(
        [
            gdA.reset_index(drop=True),
            gdB_nearest,
            pd.Series(dist, name='dist')
        ], 
        axis=1)

    return gdf

In [244]:
# Find the closest airport to the missing ones
distance_matrix = ckdnearest(gpd2, gpd1[~gpd1.IATA_CODE.isin(missing_airports)]).sort_values('dist')
airport_mapping = distance_matrix[["MISSING_IATA_CODE", "IATA_CODE"]]
airport_mapping

Unnamed: 0,MISSING_IATA_CODE,IATA_CODE
8,CLD,SAN
20,MQT,ESC
28,SCE,MDT
11,FCA,MSO
35,YUM,PSP
18,KTN,BLI
33,WRG,BLI
26,PSG,BLI
5,BQN,MIA
29,SIT,BLI


In [253]:
df_missing_airports = pd.DataFrame()
for airports in airport_mapping.itertuples():
    missing_airport = airports[1]
    closest_airport = airports[2]
    closest_airport = df[df.station==closest_airport]
    closest_airport.station = missing_airport
    df_missing_airports = df_missing_airports.append(closest_airport)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [255]:
df = df.append(df_missing_airports)

In [257]:
df.to_csv("weather_for_all_airports.csv")