# **Weather Data**

In [None]:
import pandas as pd
import math
import datetime
import urllib.request
import json
import numpy as np
import re
from tqdm.notebook import trange

In [None]:
# Import df
df = pd.read_csv('input_weather.csv',low_memory=False)
# Set Fixture id as index
df.set_index('id', inplace=True)
#### Change df types ####
float_columns = ['weather_report_pressure', 'weather_report_temperature_celcius_temp', 'weather_report_wind_degree', 'weather_windspeed(m/s)', 'home_passes_percentage', 'away_passes_percentage'] # columns that will be floats
# Convert all numeric columns to int except those that are expressed in floats
df[['weather_windspeed(m/s)', 'weather_clouds(%)', 'weather_humidity(%)']] = df[['weather_windspeed(m/s)', 'weather_clouds(%)', 'weather_humidity(%)']].astype('float64')       
m = df.select_dtypes(np.number).loc[:, ~df.select_dtypes(np.number).columns.isin(float_columns)]
df[m.columns]= m.round().astype('Int64')
# Convert dates to datetime format
df['time_starting_at_date_time'] = pd.to_datetime(df['time_starting_at_date_time'], infer_datetime_format=True)
df = df.sort_values(by='time_starting_at_date_time')
df = df.rename(columns={'time_starting_at_date_time': 'time'})

In [None]:
df = df[['time', 'venue_coordinates']]

In [None]:
df['latitude'] = df.apply(lambda row: float(re.search('\((.*),(.*)\)', row['venue_coordinates']).group(1)), axis=1)
df['longitude'] = df.apply(lambda row: float(re.search('\((.*),(.*)\)', row['venue_coordinates']).group(2)), axis=1)

In [None]:
df.head()

In [None]:
records = []
labels = ['id','time','lat','lon','temp','precip','cloudcover','humidity','pressure','sunset','winddir','windgust','windspeed'] 

In [None]:
# SECOND 500
for i in trange(2490, 3490):
   
   weather_api_endpoint = 'https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/weatherdata/history'
   query_params = '&contentType=json&aggregateMinutes=15&unitGroup=metric&includeAstronomy=true&include=stats,obs&elements=temp,precip,temp,precip,cloudcover,humidity,pressure,sunset,winddir,windgust,windspeed&key={}&startDateTime={}&endDateTime={}&locations={},{}'

   query_params=query_params.format('DZH75DKGAFFV2JCKN8CUJK7NN', df['time'].iloc[i].isoformat(), df['time'].iloc[i].isoformat(),df['latitude'].iloc[i],df['longitude'].iloc[i])
   try:
       response = urllib.request.urlopen(weather_api_endpoint +"?"+ query_params)
       data = response.read()
   except Exception:
      print("Error reading from {}".format(weather_api_endpoint +"?"+ query_params))

   locations = json.loads(data.decode('utf-8'))["locations"]
   for locationid in locations:
      value = locations[locationid]['values'][0]
      records.append((df.index[i], df['time'].iloc[i].isoformat(),df['latitude'].iloc[i],df['longitude'].iloc[i],value["temp"],value["precip"],value["cloudcover"],value["humidity"],value["sealevelpressure"],value["sunset"], value["wdir"],value["wgust"], value["wspd"]))

In [None]:
output_df = pd.DataFrame.from_records(records, columns=labels)
output_df.tail()

In [None]:
output_df.to_csv('weather_out_complete.csv', index=False) 