# PLEASE CLONE THIS NOTEBOOK INTO YOUR PERSONAL FOLDER
# DO NOT RUN CODE IN THE SHARED FOLDER

In [0]:
# Put at the top of any notebooks for storing in blob

from pyspark.sql.functions import col, max
import pandas as pd
import numpy as np

blob_container = "team06" # The name of your container created in https://portal.azure.com
storage_account = "apatel" # The name of your Storage account created in https://portal.azure.com
secret_scope = "team06" # The name of the scope created in your local computer using the Databricks CLI
secret_key = "team06" # The name of the secret key created in your local computer using the Databricks CLI 
blob_url = f"wasbs://{blob_container}@{storage_account}.blob.core.windows.net"
mount_path = "/mnt/mids-w261"

In [0]:
# Inspect the Mount's Final Project folder 
display(dbutils.fs.ls("/mnt/mids-w261/datasets_final_project"))

path,name,size
dbfs:/mnt/mids-w261/datasets_final_project/airlines/,airlines/,0
dbfs:/mnt/mids-w261/datasets_final_project/airlines_data/,airlines_data/,0
dbfs:/mnt/mids-w261/datasets_final_project/parquet_airlines_data/,parquet_airlines_data/,0
dbfs:/mnt/mids-w261/datasets_final_project/parquet_airlines_data_3m/,parquet_airlines_data_3m/,0
dbfs:/mnt/mids-w261/datasets_final_project/parquet_airlines_data_6m/,parquet_airlines_data_6m/,0
dbfs:/mnt/mids-w261/datasets_final_project/stations_data/,stations_data/,0
dbfs:/mnt/mids-w261/datasets_final_project/weather_data/,weather_data/,0
dbfs:/mnt/mids-w261/datasets_final_project/weather_data_6_hr/,weather_data_6_hr/,0
dbfs:/mnt/mids-w261/datasets_final_project/weather_data_single/,weather_data_single/,0


In [0]:
df_nearest_stations = spark.read.parquet("wasbs://team06@apatel.blob.core.windows.net/nearest_stations")
display(df_nearest_stations)


IATA_code,airport_name,station_name,station_id,station_lat,station_lon,airport_lat,airport_lon,distance,ROW_NUM
AAA,ANAA,HILO INTERNATIONAL AIRPORT,91285021504,19.719,-155.053,-17.353,-145.51,4251.898933992638,1
ABU,,HENRY E ROHLSEN AIRPORT,78551011624,17.7,-64.813,0.0,0.0,7347.999434125593,1
ACE,LANZAROTE,EASTPORT,72608014608,44.917,-67.0,28.945,-13.605,4966.512915532155,1
ACY,ATLANTIC CITY INTERNATIONAL,ATLANTIC CITY INTERNATIONAL A,72407093730,39.452,-74.567,39.458,-74.577,1.0873124401837904,1
ADA,INCIRLIK AB,BARROW POINT BARROW,70027127506,71.333,-156.65,37.002,35.426,7930.793499190758,1
ADD,BOLE INTERNATIONAL,LORING AFB/LIMESTON,72712514623,46.95,-67.883,8.977,38.8,10514.858000930082,1
ADE,,HENRY E ROHLSEN AIRPORT,78551011624,17.7,-64.813,0.0,0.0,7347.999434125593,1
ADL,ADELAIDE INTERNATIONAL,BARKING SANDS PACIFIC MISSILE,91162022501,22.033,-159.783,-34.945,138.531,9077.164809077083,1
ADQ,KODIAK,KODIAK AIRPORT,70350025501,57.751,-152.486,57.75,-152.494,0.4875259749014348,1
ADZ,GUSTAVO ROJAS PINILLA,KEY WEST INTERNATIONAL AIRPOR,72201012836,24.557,-81.755,12.583,-81.711,1331.4579706172217,1


In [0]:
#####EXAMPLE OF NORMALIZING DISTANCES
df_nearest_stations.createOrReplaceTempView('nearest_stations')

spark.sql('''
select *, (distance - (select mean(distance) from nearest_stations))/(select std(distance) from nearest_stations) as dist_norm
from nearest_stations
''').show()

# spark.sql('''
# select count(ed1)
# from weather_vw
# where ed1 == ''
# ''').show()

In [0]:
# Load 2015 Q1 for Flights
df_airlines = spark.read.parquet("/mnt/mids-w261/datasets_final_project/parquet_airlines_data_3m/")
display(df_airlines)


YEAR,QUARTER,MONTH,DAY_OF_MONTH,DAY_OF_WEEK,FL_DATE,OP_UNIQUE_CARRIER,OP_CARRIER_AIRLINE_ID,OP_CARRIER,TAIL_NUM,OP_CARRIER_FL_NUM,ORIGIN_AIRPORT_ID,ORIGIN_AIRPORT_SEQ_ID,ORIGIN_CITY_MARKET_ID,ORIGIN,ORIGIN_CITY_NAME,ORIGIN_STATE_ABR,ORIGIN_STATE_FIPS,ORIGIN_STATE_NM,ORIGIN_WAC,DEST_AIRPORT_ID,DEST_AIRPORT_SEQ_ID,DEST_CITY_MARKET_ID,DEST,DEST_CITY_NAME,DEST_STATE_ABR,DEST_STATE_FIPS,DEST_STATE_NM,DEST_WAC,CRS_DEP_TIME,DEP_TIME,DEP_DELAY,DEP_DELAY_NEW,DEP_DEL15,DEP_DELAY_GROUP,DEP_TIME_BLK,TAXI_OUT,WHEELS_OFF,WHEELS_ON,TAXI_IN,CRS_ARR_TIME,ARR_TIME,ARR_DELAY,ARR_DELAY_NEW,ARR_DEL15,ARR_DELAY_GROUP,ARR_TIME_BLK,CANCELLED,CANCELLATION_CODE,DIVERTED,CRS_ELAPSED_TIME,ACTUAL_ELAPSED_TIME,AIR_TIME,FLIGHTS,DISTANCE,DISTANCE_GROUP,CARRIER_DELAY,WEATHER_DELAY,NAS_DELAY,SECURITY_DELAY,LATE_AIRCRAFT_DELAY,FIRST_DEP_TIME,TOTAL_ADD_GTIME,LONGEST_ADD_GTIME,DIV_AIRPORT_LANDINGS,DIV_REACHED_DEST,DIV_ACTUAL_ELAPSED_TIME,DIV_ARR_DELAY,DIV_DISTANCE,DIV1_AIRPORT,DIV1_AIRPORT_ID,DIV1_AIRPORT_SEQ_ID,DIV1_WHEELS_ON,DIV1_TOTAL_GTIME,DIV1_LONGEST_GTIME,DIV1_WHEELS_OFF,DIV1_TAIL_NUM,DIV2_AIRPORT,DIV2_AIRPORT_ID,DIV2_AIRPORT_SEQ_ID,DIV2_WHEELS_ON,DIV2_TOTAL_GTIME,DIV2_LONGEST_GTIME,DIV2_WHEELS_OFF,DIV2_TAIL_NUM,DIV3_AIRPORT,DIV3_AIRPORT_ID,DIV3_AIRPORT_SEQ_ID,DIV3_WHEELS_ON,DIV3_TOTAL_GTIME,DIV3_LONGEST_GTIME,DIV3_WHEELS_OFF,DIV3_TAIL_NUM,DIV4_AIRPORT,DIV4_AIRPORT_ID,DIV4_AIRPORT_SEQ_ID,DIV4_WHEELS_ON,DIV4_TOTAL_GTIME,DIV4_LONGEST_GTIME,DIV4_WHEELS_OFF,DIV4_TAIL_NUM,DIV5_AIRPORT,DIV5_AIRPORT_ID,DIV5_AIRPORT_SEQ_ID,DIV5_WHEELS_ON,DIV5_TOTAL_GTIME,DIV5_LONGEST_GTIME,DIV5_WHEELS_OFF,DIV5_TAIL_NUM
2015,1,2,1,7,2015-02-01,AA,19805,AA,N3MEAA,392,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,12953,1295302,31703,LGA,"New York, NY",NY,36,New York,22,730,749.0,19.0,19.0,1.0,1.0,0700-0759,34.0,823.0,1056.0,4.0,1030,1100.0,30.0,30.0,1.0,2.0,1000-1059,0.0,,0.0,120.0,131.0,93.0,1.0,733.0,3,0.0,19.0,11.0,0.0,0.0,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2015,1,2,2,1,2015-02-02,AA,19805,AA,N3LPAA,392,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,12953,1295302,31703,LGA,"New York, NY",NY,36,New York,22,730,,,,,,0700-0759,,,,,1030,,,,,,1000-1059,1.0,B,0.0,120.0,,,1.0,733.0,3,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2015,1,2,3,2,2015-02-03,AA,19805,AA,N3LXAA,392,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,12953,1295302,31703,LGA,"New York, NY",NY,36,New York,22,730,735.0,5.0,5.0,0.0,0.0,0700-0759,19.0,754.0,1027.0,9.0,1030,1036.0,6.0,6.0,0.0,0.0,1000-1059,0.0,,0.0,120.0,121.0,93.0,1.0,733.0,3,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2015,1,2,4,3,2015-02-04,AA,19805,AA,N3LHAA,392,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,12953,1295302,31703,LGA,"New York, NY",NY,36,New York,22,730,727.0,-3.0,0.0,0.0,-1.0,0700-0759,11.0,738.0,1008.0,5.0,1030,1013.0,-17.0,0.0,0.0,-2.0,1000-1059,0.0,,0.0,120.0,106.0,90.0,1.0,733.0,3,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2015,1,2,5,4,2015-02-05,AA,19805,AA,N3LYAA,392,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,12953,1295302,31703,LGA,"New York, NY",NY,36,New York,22,730,757.0,27.0,27.0,1.0,1.0,0700-0759,23.0,820.0,1058.0,3.0,1030,1101.0,31.0,31.0,1.0,2.0,1000-1059,0.0,,0.0,120.0,124.0,98.0,1.0,733.0,3,0.0,0.0,31.0,0.0,0.0,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2015,1,2,6,5,2015-02-06,AA,19805,AA,N3LVAA,392,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,12953,1295302,31703,LGA,"New York, NY",NY,36,New York,22,730,723.0,-7.0,0.0,0.0,-1.0,0700-0759,10.0,733.0,1007.0,4.0,1030,1011.0,-19.0,0.0,0.0,-2.0,1000-1059,0.0,,0.0,120.0,108.0,94.0,1.0,733.0,3,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2015,1,2,8,7,2015-02-08,AA,19805,AA,N3MAAA,392,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,12953,1295302,31703,LGA,"New York, NY",NY,36,New York,22,730,728.0,-2.0,0.0,0.0,-1.0,0700-0759,10.0,738.0,1012.0,6.0,1030,1018.0,-12.0,0.0,0.0,-1.0,1000-1059,0.0,,0.0,120.0,110.0,94.0,1.0,733.0,3,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2015,1,2,9,1,2015-02-09,AA,19805,AA,N3MGAA,392,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,12953,1295302,31703,LGA,"New York, NY",NY,36,New York,22,730,807.0,37.0,37.0,1.0,2.0,0700-0759,15.0,822.0,1053.0,5.0,1030,1058.0,28.0,28.0,1.0,1.0,1000-1059,0.0,,0.0,120.0,111.0,91.0,1.0,733.0,3,23.0,5.0,0.0,0.0,0.0,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2015,1,2,10,2,2015-02-10,AA,19805,AA,N3LTAA,392,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,12953,1295302,31703,LGA,"New York, NY",NY,36,New York,22,730,722.0,-8.0,0.0,0.0,-1.0,0700-0759,27.0,749.0,1027.0,3.0,1030,1030.0,0.0,0.0,0.0,0.0,1000-1059,0.0,,0.0,120.0,128.0,98.0,1.0,733.0,3,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2015,1,2,11,3,2015-02-11,AA,19805,AA,N3LVAA,392,13930,1393003,30977,ORD,"Chicago, IL",IL,17,Illinois,41,12953,1295302,31703,LGA,"New York, NY",NY,36,New York,22,730,724.0,-6.0,0.0,0.0,-1.0,0700-0759,12.0,736.0,1055.0,5.0,1030,1100.0,30.0,30.0,1.0,2.0,1000-1059,0.0,,0.0,120.0,156.0,139.0,1.0,733.0,3,0.0,0.0,30.0,0.0,0.0,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [0]:
# Load the 2015 Q1 for Weather
df_weather = spark.read.parquet("/mnt/mids-w261/datasets_final_project/weather_data/*").filter(col('DATE') < "2015-04-01T00:00:00.000").cache()
# display(df_weather)
# df_weather.filter(df_weather("report_type") === "FM-15").show()

In [0]:
#df_weather = df_weather.filter(col("report_type").like("FM-15"))
display(df_weather)

station,report_type,date,source,latitude,longitude,elevation,name,call_sign,quality_control,wnd,cig,vis,tmp,dew,slp,ga1,ga2,ga3,ga4,ge1,gf1,ka1,ka2,ma1,md1,oc1,od1,od2,rem,eqd,ay2,aa1,ay1,aa2,gg1,gh1,gj1,kf1,kg1,kg2,me1
3809099999,FM-15,2015-01-01T00:50:00.000+0000,4,50.086092,-5.255711,81.38,"CULDROSE, UK",99999,V020,"210,1,N,0077,1","00183,1,C,N",8000199,1101,1001,999999,"02,1,+00122,1,99,9","04,1,+00183,1,99,9","08,1,+00305,1,99,9",,"9,AGL ,+99999,+99999",99999021999001221999999,,,102901999999,,,,,MET079METAR EGDR 010050Z 21015KT 8000 -DZ FEW004 SCT006 OVC010 11/10 Q1029 YLO1=,,,,,,,,,,,,
3809099999,FM-15,2015-01-01T01:50:00.000+0000,4,50.086092,-5.255711,81.38,"CULDROSE, UK",99999,V020,"200,1,N,0082,1","00244,1,9,N",8000199,1201,1001,999999,"04,1,+00183,1,99,9","07,1,+00244,1,99,9","08,1,+00305,1,99,9",,"9,AGL ,+99999,+99999",99999041999001831999999,,,102901999999,,1441.0,,,MET086METAR EGDR 010150Z 20016G28KT 8000 HZ SCT006 BKN008 OVC010 12/10 Q1029 REDZ YLO1=,,,,,,,,,,,,
3809099999,FM-15,2015-01-01T02:50:00.000+0000,4,50.086092,-5.255711,81.38,"CULDROSE, UK",99999,V020,"210,1,N,0093,1","00122,1,9,N",6000199,1101,1101,999999,"02,1,+00061,1,99,9","07,1,+00122,1,99,9","08,1,+00213,1,99,9",,"9,AGL ,+99999,+99999",99999021999000611999999,,,102901999999,,,,,MET079METAR EGDR 010250Z 21018KT 6000 -DZ FEW002 BKN004 OVC007 11/11 Q1029 YLO2=,,,,,,,,,,,,
3809099999,FM-15,2015-01-01T03:50:00.000+0000,4,50.086092,-5.255711,81.38,"CULDROSE, UK",99999,V020,"200,1,N,0082,1","00122,1,9,N",6000199,1101,1101,999999,"02,1,+00061,1,99,9","07,1,+00122,1,99,9","08,1,+00183,1,99,9",,"9,AGL ,+99999,+99999",99999021999000611999999,,,102801999999,,1341.0,,,MET082METAR EGDR 010350Z 20016G26KT 6000 -DZ FEW002 BKN004 OVC006 11/11 Q1028 YLO2=,,,,,,,,,,,,
3809099999,FM-15,2015-01-01T04:50:00.000+0000,4,50.086092,-5.255711,81.38,"CULDROSE, UK",99999,V020,"200,1,N,0082,1","00122,1,9,N",2500199,1101,1101,999999,"04,1,+00061,1,99,9","08,1,+00122,1,99,9",,,"9,AGL ,+99999,+99999",99999041999000611999999,,,102801999999,,1391.0,,,MET076METAR EGDR 010450Z 20016G27KT 2500 -RADZ SCT002 OVC004 11/11 Q1028 AMB=,,,,,,,,,,,,
3809099999,FM-15,2015-01-01T05:50:00.000+0000,4,50.086092,-5.255711,81.38,"CULDROSE, UK",99999,V020,"200,1,N,0093,1","00061,1,C,N",3000199,1201,1101,999999,"02,1,+00030,1,99,9","04,1,+00061,1,99,9","07,1,+00122,1,99,9",,"9,AGL ,+99999,+99999",99999021999000301999999,,,102701999999,,1491.0,,,MET085METAR EGDR 010550Z 20018G29KT 3000 BR FEW001 SCT002 BKN004 12/11 Q1027 RERA AMB=,,,,,,,,,,,,
3809099999,FM-15,2015-01-01T06:50:00.000+0000,4,50.086092,-5.255711,81.38,"CULDROSE, UK",99999,V020,"210,1,N,0088,1","00152,1,9,N",3000199,1201,1101,999999,"04,1,+00061,1,99,9","07,1,+00152,1,99,9","08,1,+00305,1,99,9",,"9,AGL ,+99999,+99999",99999041999000611999999,,,102701999999,,1441.0,,,MET080METAR EGDR 010650Z 21017G28KT 3000 BR SCT002 BKN005 OVC010 12/11 Q1027 AMB=,,,,,,,,,,,,
3809099999,FM-15,2015-01-01T07:50:00.000+0000,4,50.086092,-5.255711,81.38,"CULDROSE, UK",99999,V020,"210,1,N,0082,1","00091,1,C,N",4500199,1201,1101,999999,"02,1,+00061,1,99,9","04,1,+00091,1,99,9","08,1,+00213,1,99,9",,"9,AGL ,+99999,+99999",99999021999000611999999,,,102801999999,,1341.0,,,MET081METAR EGDR 010750Z 21016G26KT 4500 BR FEW002 SCT003 OVC007 12/11 Q1028 YLO2=,,,,,,,,,,,,
3809099999,FM-15,2015-01-01T08:50:00.000+0000,4,50.086092,-5.255711,81.38,"CULDROSE, UK",99999,V020,"200,1,N,0077,1","00122,1,9,N",6000199,1201,1101,999999,"02,1,+00061,1,99,9","07,1,+00122,1,99,9","08,1,+00244,1,99,9",,"9,AGL ,+99999,+99999",99999021999000611999999,,,102801999999,,1341.0,,,MET113METAR EGDR 010850Z 20015G26KT 6000 HZ FEW002 BKN004 OVC008 12/11 Q1028 YLO2 TEMPO 1500 BR SCT002 OVC006 AMB=,,,,,,,,,,,,
3809099999,FM-15,2015-01-01T09:50:00.000+0000,4,50.086092,-5.255711,81.38,"CULDROSE, UK",99999,V020,"210,1,N,0093,1","00213,1,9,N",7000199,1201,1101,999999,"04,1,+00122,1,99,9","07,1,+00213,1,99,9","08,1,+00305,1,99,9",,"9,AGL ,+99999,+99999",99999041999001221999999,,,102801999999,,1441.0,,,MET113METAR EGDR 010950Z 21018G28KT 7000 BR SCT004 BKN007 OVC010 12/11 Q1028 YLO2 TEMPO 1500 BR SCT002 OVC006 AMB=,,,,,,,,,,,,


In [0]:
df_weather.createOrReplaceTempView('weather_vw')
#Total rows = 29823926

spark.sql('''
select distinct station
from weather_vw
''').show()

In [0]:

df_weather.createOrReplaceTempView('weather_vw')
#Total rows = 29823926

spark.sql('''
select mw1, mw2
from weather_vw
limit 10
''').show()


In [0]:
#edited from https://nathanrooy.github.io/posts/2016-09-07/haversine-with-python/
import math
# Finds the distance between two lat-long points in kilometers
def haversine(coord1,coord2):
  lon1,lat1=coord1
  lon2,lat2=coord2
        
  R=6371000                               # radius of Earth in meters
  phi_1=math.radians(lat1)
  phi_2=math.radians(lat2)

  delta_phi=math.radians(lat2-lat1)
  delta_lambda=math.radians(lon2-lon1)

  a=math.sin(delta_phi/2.0)**2+\
         math.cos(phi_1)*math.cos(phi_2)*\
         math.sin(delta_lambda/2.0)**2
  c=2*math.atan2(math.sqrt(a),math.sqrt(1-a))
  return R*c/1000


In [0]:
haversine([-84.412977,39.152501],[-84.412946,39.152505])

In [0]:
df_stations = spark.read.parquet("/mnt/mids-w261/datasets_final_project/stations_data/*")
display(df_stations)

usaf,wban,station_id,lat,lon,neighbor_id,neighbor_name,neighbor_state,neighbor_call,neighbor_lat,neighbor_lon,distance_to_neighbor
690020,93218,69002093218,36.0,-121.233,69002093218,JOLON HUNTER LIGGETT MIL RES,CA,KHGT,36.0,-121.233,0.0
690020,93218,69002093218,36.0,-121.233,69007093217,FRITZSCHE AAF,CA,KOAR,36.683,-121.767,55.73024537916726
690020,93218,69002093218,36.0,-121.233,69014093101,EL TORO MCAS,CA,KNZJ,33.667,-117.733,255.49106220353931
690020,93218,69002093218,36.0,-121.233,70027127506,BARROW POINT BARROW,AK,KPBA,71.333,-156.65,2750.4353299559803
690020,93218,69002093218,36.0,-121.233,70045027512,LONELY,AK,LNI,70.917,-153.25,2676.3554370627157
690020,93218,69002093218,36.0,-121.233,70063027403,OLIKTOK POW 2,AK,POLI,70.5,-149.883,2604.050248854232
690020,93218,69002093218,36.0,-121.233,70063526465,GALBRAITH LAKE AIRPORT,AK,PAGB,68.479,-149.49,2490.975609447228
690020,93218,69002093218,36.0,-121.233,70063627405,PRUDHOE BAY,AK,PAUD,70.25,-148.333,2568.180281844432
690020,93218,69002093218,36.0,-121.233,70104626418,CENTRAL AIRPORT,AK,PACE,65.567,-144.765,2254.558489129194
690020,93218,69002093218,36.0,-121.233,70119526625,SHISHMAREF/NEW AIRPORT,AK,PASH,66.25,-166.089,2743.6881828292408


In [0]:
df_stations.createOrReplaceTempView('stations_vw')

spark.sql('''
select count(*)
from stations_vw
where distance_to_neighbor == 0
''').show()

In [0]:
from pyspark.sql.functions import col
import pandas as pd
import numpy as np
import os

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from pyspark.sql.functions import lit
from pyspark.sql.functions import sum, col, desc
from pyspark.sql.functions import to_date
import ast

import geopandas as gpd
import folium

In [0]:
df_weather_EDA = spark.read.parquet(f"{blob_url}/df_weather_filtered_stations")

In [0]:
df_weather_EDA.count()

In [0]:
81049826/81088742

In [0]:
display(df_weather_EDA)

STATION,DATE,SOURCE,LATITUDE,LONGITUDE,ELEVATION,NAME,REPORT_TYPE,CALL_SIGN,QUALITY_CONTROL,WND,CIG,VIS,TMP,DEW,SLP,AW1,GA1,GA2,GA3,GA4,GE1,GF1,KA1,KA2,MA1,MD1,MW1,MW2,OC1,OD1,OD2,REM,EQD,AW2,AX4,GD1,AW5,GN1,AJ1,AW3,MK1,KA4,GG3,AN1,RH1,AU5,HL1,OB1,AT8,AW7,AZ1,CH1,RH3,GK1,IB1,AX1,CT1,AK1,CN2,OE1,MW5,AO1,KA3,AA3,CR1,CF2,KB2,GM1,AT5,AY2,MW6,MG1,AH6,AU2,GD2,AW4,MF1,AA1,AH2,AH3,OE3,AT6,AL2,AL3,AX5,IB2,AI3,CV3,WA1,GH1,KF1,CU2,CT3,SA1,AU1,KD2,AI5,GO1,GD3,CG3,AI1,AL1,AW6,MW4,AX6,CV1,ME1,KC2,CN1,UA1,GD5,UG2,AT3,AT4,GJ1,MV1,GA5,CT2,CG2,ED1,AE1,CO1,KE1,KB1,AI4,MW3,KG2,AA2,AX2,AY1,RH2,OE2,CU3,MH1,AM1,AU4,GA6,KG1,AU3,AT7,KD1,GL1,IA1,GG2,OD3,UG1,CB1,AI6,CI1,CV2,AZ2,AD1,AH1,WD1,AA4,KC1,IA2,CF3,AI2,AT1,GD4,AX3,AH4,KB3,CU1,CN4,AT2,CG1,CF1,GG1,MV2,CW1,GG4,AB1,AH5,CN3
72641594854,2015-01-01T00:45:00.000+0000,4.0,42.61667,-89.03333,246.3,"JANESVILLE ROCK CO, WI US",FM-15,99999,V020,"230,1,N,0077,1","22000,1,9,N",016093199,-00801,-01601,999999,,,,,,,00991999999999999999999,,,102031999999.0,,,,981.0,,,MET053METAR KJVL 010045Z 23015G19KT 10SM SKC M08/M16 A3013=,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
72641594854,2015-01-01T01:45:00.000+0000,4.0,42.61667,-89.03333,246.3,"JANESVILLE ROCK CO, WI US",FM-15,99999,V020,"240,1,N,0067,1","22000,1,9,N",016093199,-00701,-01601,999999,,,,,,,00991999999999999999999,,,102031999999.0,,,,981.0,,,MET053METAR KJVL 010145Z 24013G19KT 10SM SKC M07/M16 A3013=,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
72641594854,2015-01-01T02:15:00.000+0000,4.0,42.61667,-89.03333,246.3,"JANESVILLE ROCK CO, WI US",FM-15,99999,V020,"240,1,N,0072,1","22000,1,9,N",016093199,-00701,-01501,999999,,,,,,,00991999999999999999999,,,102031999999.0,,,,1081.0,,,MET066METAR KJVL 010215Z AUTO 24014G21KT 10SM CLR M07/M15 A3013 RMK AO2=,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
72641594854,2015-01-01T02:35:00.000+0000,4.0,42.61667,-89.03333,246.3,"JANESVILLE ROCK CO, WI US",FM-15,99999,V020,"240,1,N,0067,1","22000,1,9,N",016093199,-00701,-01501,999999,,,,,,,00991999999999999999999,,,102001999999.0,,,,1081.0,,,MET066METAR KJVL 010235Z AUTO 24013G21KT 10SM CLR M07/M15 A3012 RMK AO2=,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
72641594854,2015-01-01T02:45:00.000+0000,4.0,42.61667,-89.03333,246.3,"JANESVILLE ROCK CO, WI US",FM-15,99999,V020,"230,1,N,0062,1","22000,1,9,N",016093199,-00701,-01501,999999,,,,,,,00991999999999999999999,,,101961999999.0,,,,981.0,,,MET062METAR KJVL 010245Z 23012G19KT 10SM SKC M07/M15 A3011 RMK LAST=,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
72641594854,2015-01-01T02:55:00.000+0000,4.0,42.61667,-89.03333,246.3,"JANESVILLE ROCK CO, WI US",FM-15,99999,V020,"240,1,N,0057,1","22000,1,9,N",016093199,-00701,-01501,999999,,,,,,,00991999999999999999999,,,101961999999.0,,,,821.0,,,MET066METAR KJVL 010255Z AUTO 24011G16KT 10SM CLR M07/M15 A3011 RMK AO2=,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
72641594854,2015-01-01T03:15:00.000+0000,4.0,42.61667,-89.03333,246.3,"JANESVILLE ROCK CO, WI US",FM-15,99999,V020,"240,1,N,0072,1","22000,1,9,N",016093199,-00701,-01401,999999,,,,,,,00991999999999999999999,,,101931999999.0,,,,931.0,,,MET066METAR KJVL 010315Z AUTO 24014G18KT 10SM CLR M07/M14 A3010 RMK AO2=,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
72641594854,2015-01-01T03:35:00.000+0000,4.0,42.61667,-89.03333,246.3,"JANESVILLE ROCK CO, WI US",FM-15,99999,V020,"250,1,N,0067,1","22000,1,9,N",016093199,-00701,-01401,999999,,,,,,,00991999999999999999999,,,101931999999.0,,,,1031.0,,,MET066METAR KJVL 010335Z AUTO 25013G20KT 10SM CLR M07/M14 A3010 RMK AO2=,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
72641594854,2015-01-01T03:55:00.000+0000,4.0,42.61667,-89.03333,246.3,"JANESVILLE ROCK CO, WI US",FM-15,99999,V020,"250,1,N,0057,1","22000,1,9,N",016093199,-00701,-01401,999999,,,,,,,00991999999999999999999,,,101931999999.0,,,,,,,MET063METAR KJVL 010355Z AUTO 25011KT 10SM CLR M07/M14 A3010 RMK AO2=,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
72641594854,2015-01-01T04:15:00.000+0000,4.0,42.61667,-89.03333,246.3,"JANESVILLE ROCK CO, WI US",FM-15,99999,V020,"240,1,N,0046,1","22000,1,9,N",016093199,-00701,-01401,999999,,,,,,,00991999999999999999999,,,101901999999.0,,,,771.0,,,MET066METAR KJVL 010415Z AUTO 24009G15KT 10SM CLR M07/M14 A3009 RMK AO2=,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [0]:
df_6m_join_dl = spark.read.parquet(f"{blob_url}/6m_leftjoin_all_airports")