<a href="https://colab.research.google.com/github/haraldriisager/ML-Project/blob/main/Notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
!pip install gdown
!pip install openmeteo-requests
!pip install requests-cache retry-requests numpy pandas



In [26]:
import gdown
import pandas as pd
import openmeteo_requests
import requests_cache
from retry_requests import retry
from datetime import datetime, timedelta

In [31]:
# Import Flight Delay data set

file_id = '1Uf3q-CoSVK84kogmfTAtvIZ3k8crjN1d'
gdown.download(f'https://drive.google.com/uc?id={file_id}', 'data.csv', quiet=False)
df = pd.read_csv('data.csv')

print(df.columns)

Downloading...
From (original): https://drive.google.com/uc?id=1Uf3q-CoSVK84kogmfTAtvIZ3k8crjN1d
From (redirected): https://drive.google.com/uc?id=1Uf3q-CoSVK84kogmfTAtvIZ3k8crjN1d&confirm=t&uuid=bcedeb4c-5281-4151-8222-f588a8a1ad1a
To: /content/data.csv
100%|██████████| 256M/256M [00:03<00:00, 84.2MB/s]


Index(['FlightID', 'Airline', 'FlightNumber', 'Origin', 'Destination',
       'ScheduledDeparture', 'ActualDeparture', 'ScheduledArrival',
       'ActualArrival', 'DelayMinutes', 'DelayReason', 'Cancelled', 'Diverted',
       'AircraftType', 'TailNumber', 'Distance'],
      dtype='object')


In [28]:
# Import Airport locations data set

file_id = '1eK1b3XX3jl-9XtQrH-_924YjV3rlqUFB'
gdown.download(f'https://drive.google.com/uc?id={file_id}', 'airport_locations.csv', quiet=False)
airport_locations = pd.read_csv('airport_locations.csv', delimiter=';')
airport_locations = airport_locations[['Airport Code', 'Latitude', 'Longitude']].dropna()
airport_locations = airport_locations.set_index('Airport Code').to_dict(orient='index')

Downloading...
From: https://drive.google.com/uc?id=1eK1b3XX3jl-9XtQrH-_924YjV3rlqUFB
To: /content/airport_locations.csv
100%|██████████| 875k/875k [00:00<00:00, 32.7MB/s]


In [32]:
# Append the origin and destination coordinates to the data set

def get_airport_coordinates(airport_code):
  if airport_code in airport_locations:
    return airport_locations[airport_code]['Latitude'], airport_locations[airport_code]['Longitude']
  return None, None

df["OriginLatitude"], df["OriginLongitude"] = zip(*df["Origin"].apply(get_airport_coordinates))
df["DestinationLatitude"], df["DestinationLongitude"] = zip(*df["Destination"].apply(get_airport_coordinates))

In [6]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)
url = "https://archive-api.open-meteo.com/v1/archive"

In [33]:
def get_weather_data(date, time, latitude, longitude):

  flight_datetime = datetime.strptime(f"{date} {time}", "%Y-%m-%d %H:%M")
  start_date = flight_datetime.strftime("%Y-%m-%d")
  end_date = (flight_datetime + timedelta(hours=1)).strftime("%Y-%m-%d")

  params = {
    "latitude": latitude,
    "longitude": longitude,
    "start_date": date,
    "end_date": date,
    "hourly": [
        "temperature_2m",
        "precipitation",
        "rain",
        "snowfall",
        "weather_code"
        "windspeed_10m",
        "windspeed_100m",
        "winddirection_10m",
        "winddirection_100m",
        "windgusts_10m",
    ],
    "timezone": "auto"
  }

  response = openmeteo.weather_api(url, params=params)

  hourly_data = response.Hourly() if response else None
  if hourly_data:
    return {
        "temperature_2m": hourly_data["temperature_2m"],
        "precipitation": hourly_data["precipitation"],
        "rain": hourly_data["rain"],
        "snowfall": hourly_data["snowfall"],
        "weather_code": hourly_data["weather_code"],
        "windspeed_10m": hourly_data["windspeed_10m"],
        "windspeed_100m": hourly_data["windspeed_100m"],
        "winddirection_10m": hourly_data["winddirection_10m"],
        "winddirection_100m": hourly_data["winddirection_100m"]
    }
  return None

In [34]:
# Append the weather data to the data set

origin_weather_data = []
destination_weather_data = []

for _, row in df.iterrows():
  origin_weather = get_weather_data(row["FlightDate"], row["ScheduledDeparture"], row["OriginLatitude"], row["OriginLongitude"])
  destination_weather = get_weather_data(row["FlightDate"], row["ScheduledArrival"], row["DestinationLatitude"], row["DestinationLongitude"])

  origin_weather_data.append(origin_weather)
  destination_weather_data.append(destination_weather)

origin_weather_df = pd.DataFrame(origin_weather_data).add_prefix('origin_')
destination_weather_df = pd.DataFrame(destination_weather_data).add_prefix('destination_')

df = pd.concat([df, origin_weather_df, destination_weather_df], axis=1)

KeyError: 'FlightDate'

In [35]:
# Clean data so that only the desired features are present

required_columns = [
    'FlightDate',
    'Airline',
    'FlightNumber',
    'Origin',
    'Destination',
    'ScheduledDeparture',
    'ScheduledArrival',
    'ScheduledElapsedTime',
    'Distance',
    'DepartureDelay',
    'ArrivalDelay',
    'DelayDueWeather'
]

weather_columns = [col for col in df.columns if col.startswith('origin_') or col.startswith('destination_')]

final_columns = required_columns + weather_columns

df_cleaned = df[final_columns]

print(df_cleaned.head())

KeyError: "['FlightDate', 'ScheduledElapsedTime', 'DepartureDelay', 'ArrivalDelay', 'DelayDueWeather'] not in index"