<a href="https://colab.research.google.com/github/chorltonm/fa-cup-upsets/blob/main/notebooks/source_apis/match_day_weather.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import Libaries

import requests
import math
import json
import subprocess
import os
import pandas as pd

from google.cloud import bigquery
from google.oauth2 import service_account
from google.colab import drive
from google.colab import userdata

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

# Change default ouput directory
os.chdir('/content/drive/MyDrive/birkbeck_msc-project/output_files')

In [None]:
# Authentication credentials and keys

# Visual Crossing API Authentication & Headers
userdata.get('vc_api_key')
api_key = userdata.get('vc_api_key')

# Google Service Account

# Load the JSON key from local Google Collab file
key = json.load(open('/content/drive/MyDrive/service_account.json', 'r'))

# Authenticate using the loaded key
credentials = service_account.Credentials.from_service_account_info(key)

# Set up the BigQuery client with the credentials to project
client = bigquery.Client(credentials=credentials, project='birkbeck-msc-project-422917')

In [None]:
match_date_location_query = f"SELECT * FROM preparation_layer.view_fa_cup_round_3_match_date_location"

match_date_location_data = client.query(match_date_location_query)

with open(f"fa_cup_round_3_weather_data.json", 'w') as match_weather_file:
    for date_location in match_date_location_data:
      match_id = date_location['match_id']
      match_date = date_location['match_date']
      latitude = date_location['match_latitude']
      longitude = date_location['match_longitude']

      # Get weather for the match date, latitude and longitude
      weather_endpoint = f'https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{latitude},{longitude}/{match_date}?key={api_key}'
      weather_response = requests.get(weather_endpoint)
      weather_data = weather_response.json()

      #Filter out the 'days.stations' data
      filtered_weather_data = {
            'address': weather_data['address'],
            'days': [{k: v for k, v in day.items() if k != 'stations'} for day in weather_data['days']]
        }

      # Create a dictionary with match_id and weather_data
      data_to_write = {
            'match_id': match_id,
            'weather_data': filtered_weather_data
        }

      weather_data_json = json.dumps(data_to_write)

      match_weather_file.write(weather_data_json + "\n")

      print(data_to_write)

In [None]:
# Load weather file to BIQ QUERY extract_layer weather table

# Set the dataset and table name
dataset_name = 'extract_layer'
table_name = 'api_visual_crossing_weather'
table_ref = client.dataset(dataset_name).table(table_name)

# Load the final data file into the BiqQuery table
job_config = bigquery.LoadJobConfig(
  source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
  write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
  autodetect=True
)

with open(f"fa_cup_round_3_weather_data.json", 'rb') as lineups_file:
              job = client.load_table_from_file(
                    lineups_file, table_ref, job_config=job_config
                )
# Wait for the job to complete
job.result()
total_rows = job.output_rows
print(f"Big Query Rows {total_rows} processed into {dataset_name}.{table_name}")

In [None]:
# Load weather data into BIQ QUERY load_layer weather table

# Set the target load dataset and table names
load_dataset_name = 'load_layer'
load_table_name = 'all_api_visual_crossing_weather'
table_ref = client.dataset(load_dataset_name).table(load_table_name)

# Delete rows already inserted
delete_query = f"DELETE FROM `{load_dataset_name}.{load_table_name}` WHERE match_id IN (SELECT DISTINCT match_id FROM `{dataset_name}.{table_name}`)"
delete_job = client.query(delete_query)
delete_result = delete_job.result()
total_rows_deleted = delete_result.num_dml_affected_rows
print(f"Big Query target table {load_dataset_name}.{load_table_name} rows deleted: {total_rows_deleted}")

# Define the load job configuration
job_config = bigquery.QueryJobConfig(
    destination=table_ref,
    write_disposition=bigquery.WriteDisposition.WRITE_APPEND,
    schema_update_options=[
        bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION
    ]
)

# Define the SQL query
insert_query = f"SELECT * FROM `{dataset_name}.{table_name}`"

# Run the load job and wait for the job to complete
load_job = client.query(insert_query, job_config=job_config)


try:
  load_job_result = load_job.result()

  total_rows = load_job_result.total_rows
  loaded_rows = job.output_rows
  print(f"Big Query target table {load_dataset_name}.{load_table_name} rows inserted: {loaded_rows}")
  print(f"Big Query target table {load_dataset_name}.{load_table_name} final row count: {total_rows} ")

except Exception as e:
    print(f"Error: {e}")
    for error in job.errors:
        print(f"Error details: {error}")
