<a href="https://colab.research.google.com/github/cboyda/LighthouseLabs/blob/main/WeatherAPI_Extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# how to mount google drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
from datetime import datetime, timedelta, date    # allow modification of json into time format

## Weather Functions

In [None]:
def request_weather(start_date,end_date,latitude,longitude,debug = False):
  # acceptable formats:
  # start_date or end_date = '2019-11-14'
  # latitude = 53.05106435177193
  # longitude = -114.00032023059188

  url = "http://api.weatherapi.com/v1/history.json?"

  location = str(latitude) + ", " + str(longitude)
  # print("GPS location:",location) if debug else None

  querystring = {
      'key': weatherapi_key,
      'q': location,
      'dt': start_date,
      'end_dt': end_date,
      'lang': 'en'
  }
  print ("Querystring is",querystring) if debug else None

  headers = {'Content-Type': 'application/json'}

  try:
      response = requests.get(url, headers=headers, params=querystring)
      error_message = None
      status_code = response.status_code
      if status_code != 200: # 200 means success
          error_message = response.json().get('error', {}).get('message')
      return response.json(), status_code, error_message
  except ApiException as e:
      print("Exception when calling WeatherAPI.com->history_weather: %s\n" % e)
  return None, status_code, error_message

In [None]:
def daily_json_to_df(df,all_days):
  """
  df is the dataframe we keep adding values to, make sure to initialize prior to calling
  all_data is the data you are parsing
  """
  # could have grabbed all fields with pd.json_normalize(json_data['forecast']['forecastday'][0]['day'])
  data_rows = []
  for weather_day in all_days:
      day_data = {
          'date': weather_day['date'],
          'maxtemp_c': weather_day['day']['maxtemp_c'],
          'mintemp_c': weather_day['day']['mintemp_c'],
          'avgtemp_c': weather_day['day']['avgtemp_c'],
          'maxwind_kph': weather_day['day']['maxwind_kph'],
          'totalprecip_mm': weather_day['day']['totalprecip_mm'],
          'avgvis_km': weather_day['day']['avgvis_km'],
          'avghumidity': weather_day['day']['avghumidity'],
          'condition_text': weather_day['day']['condition']['text'],
          'condition_icon': weather_day['day']['condition']['icon'],
          'condition_code': weather_day['day']['condition']['code'],
          'sunrise': weather_day['astro']['sunrise'],
          'sunset': weather_day['astro']['sunset'],
          'moonrise': weather_day['astro']['moonrise'],
          'moonset': weather_day['astro']['moonset'],
          'moon_phase': weather_day['astro']['moon_phase'],
          'moon_illumination': weather_day['astro']['moon_illumination']
      }
      data_rows.append(day_data)

  return pd.concat([df, pd.DataFrame(data_rows)], ignore_index=True)

In [None]:
def hourly_json_to_df(df, all_days):
    """
    df is the dataframe we keep adding values to, make sure to initialize prior to calling
    all_days is the json data you are parsing
    """
    # could have grabbed all fields with pd.json_normalize(json_data['forecast']['forecastday'][0]['hour'])
    data_rows = []
    for weather_day in all_days:
        for hour_data in weather_day['hour']:
            hourly_data = {
                'time': hour_data['time'],
                'temp_c': hour_data['temp_c'],
                'is_day': hour_data['is_day'],
                'condition_text': hour_data['condition']['text'],
                'condition_icon': hour_data['condition']['icon'],
                'condition_code': hour_data['condition']['code'],
                'wind_kph': hour_data['wind_kph'],
                'wind_degree': hour_data['wind_degree'],
                'wind_direction': hour_data['wind_dir'],
                'pressure_mb': hour_data['pressure_mb'],
                'humidity': hour_data['humidity'],
                'cloud': hour_data['cloud'],
                'feelslike_c': hour_data['feelslike_c'],
                'windchill_c': hour_data['windchill_c'],
                'heatindex_c': hour_data['heatindex_c'],
                'dewpoint_c': hour_data['dewpoint_c'],
                # 'will_it_rain': hour_data['will_it_rain'], # API always returns 0
                # 'chance_of_rain': hour_data['chance_of_rain'],
                # 'will_it_snow': hour_data['will_it_snow'],
                # 'chance_of_snow': hour_data['chance_of_snow'],
                'vis_km': hour_data['vis_km'],
                'gust_kph': hour_data['gust_kph'],
                'uv': hour_data['uv']
            }
            data_rows.append(hourly_data)

    return pd.concat([df, pd.DataFrame(data_rows)], ignore_index=True)

## Weather Scraper

In [None]:
# Historical WeatherAPI.com documenation = https://www.weatherapi.com/docs/#:~:text=Forecast%20API-,History%20API,-Marine%20API
# notice that only metric measurements are stored from the API
# account was upgraded to go this far back in time

# initialize our variables
weatherapi_key = "API TOKEN"
latitude = 53.05106435177193
longitude = -114.00032023059188
df_daily = pd.DataFrame()
df_hourly = pd.DataFrame()

# make API call for start_date to end_date
start_date = '2019-11-14'
end_date = '2023-06-12'
max_days = 30 # WeatherAPI max date range

# Convert start_date to a datetime object
start_date_dt = datetime.strptime(start_date, '%Y-%m-%d')

# Format the end_date as a string
end_date_dt = datetime.strptime(end_date, '%Y-%m-%d')

while start_date_dt < end_date_dt:

    days_diff = (end_date_dt - start_date_dt).days

    # Initialize next interval
    next_date_dt = start_date_dt + timedelta(days=max_days)

    # Print the updated start_date and end_date
    print("Start Date:", start_date_dt.date())
    print("Mid Date:", next_date_dt.date())
    print("End Date:", end_date_dt.date())
    print("Number of days:", days_diff )

    if start_date_dt.date() > date.fromisoformat('2010-01-01'): # check start_date > Jan 1, 2010 for WeatherAPI.com
      if days_diff > max_days: # WeatherAPI limit
        json_data, response, error_message = request_weather(start_date_dt.strftime('%Y-%m-%d'), next_date_dt.strftime('%Y-%m-%d'), latitude, longitude, True)
      else:
        json_data, response, error_message = request_weather(start_date_dt.strftime('%Y-%m-%d'), end_date_dt.strftime('%Y-%m-%d'), latitude, longitude, True)

    if response != 200:
      print("API Response:", response)
      print("API Error:", error_message)
      start_date_dt = end_date_dt # exit while loop
    else:
      print("Writing Data")
      pastdays = json_data['forecast']['forecastday']
      # populate df_daily with summary and astro results for each day
      df_daily = daily_json_to_df(df_daily, pastdays)

      # populate df_hourly with each column for the 24-hour period for each day
      df_hourly = hourly_json_to_df(df_hourly, pastdays)

      # save to csv
      df_hourly.to_csv('/content/drive/MyDrive/Colab Notebooks/datasets/radar/hourly_weather_api_data.csv', index=False)
      df_daily.to_csv('/content/drive/MyDrive/Colab Notebooks/datasets/radar/daily_weather_api_data.csv', index=False)

    start_date_dt = next_date_dt