# Data Collection
- Weather Dataset

### Mount Drive

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Import Library

In [1]:
pip install airportsdata

Collecting airportsdata
  Downloading airportsdata-20230630-py3-none-any.whl (1.0 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m50.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: airportsdata
Successfully installed airportsdata-20230630


In [3]:
#pip install airportsdata
import pandas as pd
import airportsdata
import requests
import time

### Read Flight Dataset (Cleaned)

In [6]:
flight = pd.read_excel('/content/drive/My Drive/FYP2/FYP2_FlightDataset_Final.xlsx')

In [7]:
flight['arr_iata'].unique()

array(['BOM', 'KBR', 'DAC', 'SIN', 'CHC', 'SFO', 'CMB', 'MYY', 'MEL',
       'KCH', 'AOR', 'LGK', 'DMK', 'BKI', 'KUL', 'PNH', 'PER', 'MNL',
       'AUH', 'DPS', 'SGN', 'BNE', 'DEL', 'PEN', 'SYD', 'BKK', 'CGK',
       'JED', 'UPG', 'KNO', 'KTM', 'HKT', 'USM', 'KHI', 'CEI', 'MLE',
       'BPN', 'ISB', 'AKL', 'LAX', 'AMD', 'KHH', 'HDY', 'KKC', 'VTE',
       'KBV', 'SUB', 'KUA', 'SBW', 'HKG', 'CJB', 'DAD', 'CNX', 'UBP',
       'UTH', 'TPE', 'JHB', 'RGN', 'CEB', 'REP', 'TWU', 'HAN', 'PKU',
       'MCT', 'URT', 'BTU', 'HGH', 'IKT', 'BTJ', 'TGG', 'LPT', 'SDK',
       'KJA', 'PDG', 'CTU', 'CAN', 'PVG', 'IPH', 'CRK', 'THS', 'YIA',
       'MED', 'ATQ', 'DXB', 'TDX', 'HND', 'LBU', 'ICN', 'NST', 'XMN',
       'KIX', 'CDG', 'BWN', 'SVO', 'ZRH', 'LHR', 'BLR', 'NRT', 'SZX',
       'DVO', 'MRU', 'NAW', 'MDL', 'FRA', 'JFK', 'LHE', 'NNG', 'GAY',
       'PNQ', 'DLI', 'MAA', 'DOH', 'HEL', 'CXR', 'LPQ', 'ALA', 'PQC',
       'MDC', 'YVR', 'CNS', 'TRZ', 'CCU', 'CJU', 'KWI', 'VIE', 'TAE',
       'MAN', 'FCO',

In [8]:
flight['arr_iata'].nunique()

185

### Group By Date (get earliest and latest date for each airport)

In [9]:
flight['dep_Date_utc'] = pd.to_datetime(flight['dep_Date_utc'], format='%d-%m-%Y')
flight['arr_Date_utc'] = pd.to_datetime(flight['arr_Date_utc'], format='%d-%m-%Y')

dep = flight.groupby(['dep_iata'])['dep_Date_utc'].agg(['min', 'max']).reset_index()
arr = flight.groupby(['arr_iata'])['arr_Date_utc'].agg(['min', 'max']).reset_index()

dep = dep.rename(columns={'dep_iata': 'iata'})
arr = arr.rename(columns={'arr_iata': 'iata'})

### Weather Data Collection

In [10]:
def getData(weathertable, iata):
  airports = airportsdata.load('IATA')  # key is the IATA location code
  data_frames = []

  for index, row in iata.iterrows():
    try:
      airport = row["iata"]
      print(airport)
      latitude = airports[row["iata"]]['lat']
      longitude = airports[row["iata"]]['lon']
      start =  row['min'].date()
      end =  row['max'].date()

      url = "https://archive-api.open-meteo.com/v1/archive"
      params = {
          "latitude": latitude,
          "longitude": longitude,
          "start_date": start,
          "end_date": end,
          "hourly": ["temperature_2m","relativehumidity_2m","pressure_msl","precipitation","rain","snowfall","weathercode","cloudcover","windspeed_10m","winddirection_10m","windgusts_10m"]
      }
      response = requests.get(url, params=params)

      if response.ok:
          data = response.json()
          # Do something with the data
      else:
          response.raise_for_status()  # Raise an exception if the request was unsuccessful

      weatherdata = data['hourly']

      for i in range(len(weatherdata['time'])):
        new_data = pd.DataFrame({'Airport': airport,'Time': weatherdata['time'][i] ,'Temperature': weatherdata['temperature_2m'][i], 'Humidity': weatherdata['relativehumidity_2m'][i],
                                'SeaLevelPressure': weatherdata['pressure_msl'][i], 'Precipitation': weatherdata['precipitation'][i], 'Rain': weatherdata['rain'][i], 'Snowfall': weatherdata['snowfall'][i],
                                'WeatherCode': weatherdata['weathercode'][i], 'Cloudcover': weatherdata['cloudcover'][i], 'WindSpeed': weatherdata['windspeed_10m'][i], 'WindDirection': weatherdata['winddirection_10m'][i],
                                'WindGusts': weatherdata['windgusts_10m'][i]}, index=[i])
        weathertable = pd.concat([weathertable, new_data], axis=0, ignore_index=True)

    except:
      airport = row["iata"]
      print("ERRRRRORRRR ",airport)

  return weathertable


weathertable = pd.DataFrame(columns=['Airport','Time','Temperature','Humidity','SeaLevelPressure','Precipitation','Rain','Snowfall','WeatherCode','Cloudcover','WindSpeed','WindDirection','WindGusts'])
start_time = time.time()

weathertable = getData(weathertable,dep)

end_time = time.time() # record end time
elapsed_time = end_time - start_time # calculate elapsed time
print(f"Departure Elapsed time: {elapsed_time:.6f} seconds")

start_time = time.time()
#weathertable = getData(weathertable,arr)

end_time = time.time() # record end time
elapsed_time = end_time - start_time # calculate elapsed time
print(f"Arrival Elapsed time: {elapsed_time:.6f} seconds")

BKK
KUL
SIN
Departure Elapsed time: 8.210179 seconds
Arrival Elapsed time: 0.000030 seconds


In [None]:
weathertable = weathertable.drop_duplicates()
weathertable['Time'] = pd.to_datetime(weathertable['Time'], format='%Y-%m-%dT%H:%M')
weathertable['Date'] = weathertable['Time'].dt.strftime('%d-%m-%Y')
weathertable['Time'] =  weathertable['Time'].dt.hour
weathertable = weathertable.rename(columns={'Time': 'Hour'})

weathertable.insert(1,'Date', weathertable.pop('Date'))
weathertable

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  weathertable['Time'] = pd.to_datetime(weathertable['Time'], format='%Y-%m-%dT%H:%M')


Unnamed: 0,Airport,Date,Hour,Temperature,Humidity,SeaLevelPressure,Precipitation,Rain,Snowfall,WeatherCode,Cloudcover,WindSpeed,WindDirection,WindGusts
0,BKK,10-11-2022,0,22.8,63,1012.6,0.0,0.0,0.0,0,19,10.1,92,16.2
1,BKK,10-11-2022,1,25.0,57,1013.1,0.0,0.0,0.0,1,22,9.4,86,18.7
2,BKK,10-11-2022,2,27.5,51,1013.4,0.0,0.0,0.0,1,27,9.3,77,19.1
3,BKK,10-11-2022,3,29.5,48,1013.5,0.0,0.0,0.0,1,27,9.0,61,20.9
4,BKK,10-11-2022,4,31.0,47,1013.2,0.0,0.0,0.0,1,29,8.2,52,21.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
247219,ZRH,06-01-2023,19,5.1,96,1025.5,0.0,0.0,0.0,3,92,3.7,209,8.3
247220,ZRH,06-01-2023,20,4.2,97,1025.3,0.0,0.0,0.0,3,92,4.7,203,10.4
247221,ZRH,06-01-2023,21,4.7,97,1024.9,0.0,0.0,0.0,3,88,4.1,225,9.4
247222,ZRH,06-01-2023,22,5.0,96,1025.0,0.0,0.0,0.0,3,91,5.0,201,7.2


In [None]:
from google.colab import files

weathertable.to_excel('Data_Weather.xlsx', encoding = 'utf-8-sig',index=False)
files.download('Data_Weather.xlsx')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>