# Be able to investigate client requirements for data analysis
# 2.4 Quantitative data analysis - Fetching Match Data

* mean
* median
* standard deviation
* range

## Set up

In [1]:
try:
    import micropip
    await micropip.install(["pyoliteutils", "textblob"])
except:
    pass

In [2]:
from pyoliteutils import *
import pandas as pd

## Stadium Data

In [3]:
stadiums = pd.read_csv("../data/eagles/stadiums.csv")
stadiums

Unnamed: 0,Stadium,Postcode
0,OWLERTON,S6 2DE
1,MILLMOOR,S60 1HB
2,HEADINGLEY,LS6 3BR
3,BRAMALL LANE,S2 4QX
4,HILLSBOROUGH,S6 1SW
5,OAKWELL,S71 1ET
6,BELLE VUE,DN4 5DX
7,SALTERGATE,S40 4SX
8,TATTERSFIELD,DN4 5JW
9,THRUM HALL,HX1 4LG


## Adding the Latitude & Longitude

In [4]:
import postcodes_io_api

class Api2(postcodes_io_api.Api): # Extends the postcodes_io_api to include the place query available online
        def places_query(self, place):
            """
            This method returns data for a place
            * **:param place** - postcode to check i.e. 'Sheffield'
            * **::return** - detailed data
            ```
              data = api.places_query('Sheffield')
        
            ```
            """
            url = '/places?q={place}'.format(place=place)
            response = self._make_request('GET', url)
            data = self._parse_json_data(response.content.decode('utf-8'))
            return data

api  = Api2()

def get_place(place):
    latitude = None
    longitude = None
    
    data = api.places_query(place)
    #print("data", data)
    
    if data["status"] == 200 and len(data["result"]):   
        # Gets the data from the first item in the returned list
        if "latitude" in data["result"][0]:
            latitude = data["result"][0]["latitude"]
            longitude = data["result"][0]["longitude"]
    #print("latitude, longitude", latitude, longitude)
    return latitude, longitude

def get_latlong(postcode):
    latitude = None
    longitude = None

    # Look up postcode
    data = api.get_postcode(postcode)
    #print("data", data)
    if data["status"] != 200 :
        # if postcode look up fails try looking it upo as just the first bit (outcode)
        data = api.get_outcode(postcode)
    
    if data["status"] == 200 :        
        if "latitude" in data["result"]:
            latitude = data["result"]["latitude"]
            longitude = data["result"]["longitude"]
    #print("latitude, longitude", latitude, longitude)
    return latitude, longitude
    
def get_latlongs(df):
    if ("Latitude" not in df) and ("Postcode" in df):    
      try:
        df[["Latitude", "Longitude"]] = df.apply(
            lambda row: get_latlong(row["Postcode"]), axis=1, result_type="expand"
        )
      except Exception as e:
          print('Postcode Conversion failed : '+ str(e))
    return df

def get_places(df, field_name):
    if ("Latitude" not in df) and (field_name in df):    
      try:
        df[[field_name + " Latitude", field_name + " Longitude"]] = df.apply(
            lambda row: get_place(row[field_name]), axis=1, result_type="expand"
        )
      except Exception as e:
          print('Place  Conversion failed : '+ str(e))
    return df

In [5]:
stadiums = get_latlongs(stadiums)
stadiums

Unnamed: 0,Stadium,Postcode,Latitude,Longitude
0,OWLERTON,S6 2DE,53.406031,-1.493303
1,MILLMOOR,S60 1HB,53.428758,-1.369397
2,HEADINGLEY,LS6 3BR,53.816081,-1.580617
3,BRAMALL LANE,S2 4QX,53.371341,-1.469862
4,HILLSBOROUGH,S6 1SW,53.410844,-1.500859
5,OAKWELL,S71 1ET,53.552266,-1.468631
6,BELLE VUE,DN4 5DX,53.517626,-1.10875
7,SALTERGATE,S40 4SX,53.238963,-1.434745
8,TATTERSFIELD,DN4 5JW,53.509088,-1.113845
9,THRUM HALL,HX1 4LG,53.721864,-1.884001


## Calculating Distance from Sheffield

In [6]:
#https://towardsdatascience.com/calculating-distance-between-two-geolocations-in-python-26ad3afe287b

sheffield_latlong = get_place("Sheffield")
import haversine as hs

#stadiums['dist_from_sheffield'] = hs.haversine(sheffield_latlong, (stadiums["Latitude"], stadiums["Longitude"]), unit=Unit.MILES)

stadiums['Miles from Sheffield'] = stadiums.apply(
    lambda row: hs.haversine(sheffield_latlong, (row["Latitude"], row["Longitude"]), unit=hs.Unit.MILES), axis=1, result_type="expand"
)
stadiums

Unnamed: 0,Stadium,Postcode,Latitude,Longitude,Miles from Sheffield
0,OWLERTON,S6 2DE,53.406031,-1.493303,1.918934
1,MILLMOOR,S60 1HB,53.428758,-1.369397,5.185172
2,HEADINGLEY,LS6 3BR,53.816081,-1.580617,30.307328
3,BRAMALL LANE,S2 4QX,53.371341,-1.469862,0.773637
4,HILLSBOROUGH,S6 1SW,53.410844,-1.500859,2.367659
5,OAKWELL,S71 1ET,53.552266,-1.468631,11.729022
6,BELLE VUE,DN4 5DX,53.517626,-1.10875,17.50178
7,SALTERGATE,S40 4SX,53.238963,-1.434745,10.015773
8,TATTERSFIELD,DN4 5JW,53.509088,-1.113845,17.01545
9,THRUM HALL,HX1 4LG,53.721864,-1.884001,28.993035


In [7]:
#Save for later 
stadiums.to_csv("../data/eagles/stadiums_with_latlong.csv", index=False)