# **Weather Data**

Imports

In [1]:
import pandas as pd
import math
import datetime
import urllib.request
import json
import numpy as np
import re
from tqdm.notebook import trange

### Prepare Matches Dataframe 


Extract the date-time and coordinates for each match later find the related weather conditions.

In [2]:
# Import Matches matches
matches = pd.read_csv('../../Data/From_Preparation/match_cleaned_v00.csv',low_memory=False)
matches = matches.loc[matches['league_is_cup'] == 0, :]
# Set Fixture id as index
matches.set_index('id', inplace=True)
# Convert dates to datetime format
matches['time_starting_at_date_time'] = pd.to_datetime(matches['time_starting_at_date_time'], infer_datetime_format=True)
matches = matches.sort_values(by='time_starting_at_date_time')
matches = matches.rename(columns={'time_starting_at_date_time': 'time'})
# Keep only needed columns 
matches = matches[['time', 'venue_coordinates']]
# Use venue_coordinates to create a latitude and a longitute feature
matches['latitude'] = matches.apply(lambda row: float(re.search('\((.*),(.*)\)', row['venue_coordinates']).group(1)), axis=1)
matches['longitude'] = matches.apply(lambda row: float(re.search('\((.*),(.*)\)', row['venue_coordinates']).group(2)), axis=1)
matches = matches.loc[matches['time'] > '2016-07-01', :]

### Get Data from Weather API

In [3]:
records = [] # empty list to store records 

# Get weather data from the API looping over match observations 
for i in trange(len(matches)):
   # API Endpoint + Searched Parameters
   weather_api_endpoint = 'https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/weatherdata/history'
   query_params = '&contentType=json&aggregateMinutes=30&unitGroup=metric&includeAstronomy=true&include=stats,obs&elements=temp,precip,temp,precip,cloudcover,humidity,pressure,sunset,visibility,winddir,windgust,windspeed&key={}&startDateTime={}&endDateTime={}&locations={},{}'
   # Match API requested information with matches observations
   api_key = ''
   query_params = query_params.format(api_key, matches['time'].iloc[i].isoformat(), matches['time'].iloc[i].isoformat(), matches['latitude'].iloc[i], matches['longitude'].iloc[i])
   try:
       response = urllib.request.urlopen(weather_api_endpoint +"?"+ query_params)
       data = response.read()
   except Exception:
      print("Error reading from {}".format(weather_api_endpoint +"?"+ query_params))
   # Obtain & Store Weather data from API into records list
   locations = json.loads(data.decode('utf-8'))["locations"]
   for locationid in locations:
      value = locations[locationid]['values'][0]
      records.append((matches.index[i], matches['time'].iloc[i].isoformat(),matches['latitude'].iloc[i],matches['longitude'].iloc[i],value["temp"],value["precip"],value["cloudcover"],value["humidity"],value["sealevelpressure"],value["sunset"], value["visibility"],value["wdir"],value["wgust"], value["wspd"]))

  0%|          | 0/10536 [00:00<?, ?it/s]

### Create and Store Weather DataFrame

In [None]:
/Users/enricocattaneo/Desktop/FOOTBALL PREDICTION/Data/From_Collection/Weather_output

In [4]:
# Labels of output DataFrame
labels = ['id','time','lat','lon','temp','precip','cloudcover','humidity','pressure','sunset','visibility','winddir','windgust','windspeed'] 
# Create DataFrame using records list data (specifying labels)
weather_output_df = pd.DataFrame.from_records(records, columns=labels)
# Store output DataFrame as CSV
weather_output_df.to_csv('../../Data/From_Collection/Weather_output/weather_output_v02.csv', index=False) 
# Display weather data
weather_output_df.tail()

Unnamed: 0,id,time,lat,lon,temp,precip,cloudcover,humidity,pressure,sunset,visibility,winddir,windgust,windspeed
10531,18165757,2022-04-11T21:00:00,40.391808,-3.658611,18.5,0.0,50.5,41.27,1004.1,2022-04-11T20:49:26+02:00,11.6,197.0,83.5,27.7
10532,18220188,2022-04-15T19:00:00,44.101711,9.808218,20.5,0.0,,46.11,1023.0,2022-04-15T20:04:47+02:00,10.0,169.0,6.4,1.5
10533,18220185,2022-04-15T21:00:00,45.478025,9.124206,17.3,0.0,0.0,49.63,1018.2,2022-04-15T20:09:36+02:00,10.0,35.0,16.1,8.1
10534,18165743,2022-04-15T21:00:00,43.301376,-1.973602,14.1,0.0,100.0,85.54,1024.6,2022-04-15T20:50:47+02:00,11.0,304.0,19.0,7.7
10535,18157344,2022-04-15T21:00:00,48.107458,-1.712839,18.9,0.0,90.1,55.3,1025.3,2022-04-15T20:57:16+02:00,33.8,31.0,25.6,10.7
