# Be able to investigate client requirements for data analysis
# 2.4 Quantitative data analysis

* mean
* median
* standard deviation
* range

## Set up

In [1]:
try:
    import micropip
    await micropip.install(["pyoliteutils", "textblob"])
except:
    pass

In [2]:
from pyoliteutils import *
import pandas as pd

## Other useful Opponent / Match Data?

### Location of opponents, and distance to Sheffield

To be able to put data on a map we will need Latitude and Lngitude,

#### Possible Data Sources : 
- https://api.postcodes.io/places?q=[query]
- Nominatum / GeoPy
    - https://geopy.readthedocs.io/en/stable/
    - https://nominatim.org/release-docs/latest/api/Search/ 
    - https://medium.com/@gopesh3652/geocoding-with-python-using-nominatim-a-beginners-guide-220b250ca48d 


## Stadium Data

In [3]:
stadiums = pd.read_csv("../data/eagles/stadiums.csv")
stadiums

Unnamed: 0,Stadium,Postcode
0,OWLERTON,S6 2DE
1,MILLMOOR,S60 1HB
2,HEADINGLEY,LS6 3BR
3,BRAMALL LANE,S2 4QX
4,HILLSBOROUGH,S6 1SW
5,OAKWELL,S71 1ET
6,BELLE VUE,DN4 5DX
7,SALTERGATE,S40 4SX
8,TATTERSFIELD,DN4 5JW
9,THRUM HALL,HX1 4LG


## Adding the Latitude & Longitude

In [14]:
import postcodes_io_api
class Api2(postcodes_io_api.Api):
        def places_query(self, place):
            """
            This method returns data for post_code
            * **:param postcode** - postcode to check i.e. 'SW112EF'
            * **::return** - postcode detailed data
            ```
              data = api.places_query('Sheffield')
        
            ```
            """
            url = '/places?q={place}'.format(place=place)
            response = self._make_request('GET', url)
            data = self._parse_json_data(response.content.decode('utf-8'))
            return data

api  = Api2()

def get_place(place):
    latitude = None
    longitude = None
    
    data = api.places_query(place)
    #print("data", data)
    
    if data["status"] == 200 and len(data["result"]):        
        if "latitude" in data["result"][0]:
            latitude = data["result"][0]["latitude"]
            longitude = data["result"][0]["longitude"]
    #print("latitude, longitude", latitude, longitude)
    return latitude, longitude

def get_latlong(postcode):
    latitude = None
    longitude = None
    
    data = api.get_postcode(postcode)
    #print("data", data)
    if data["status"] != 200 :
        data = api.get_outcode(postcode)
    
    if data["status"] == 200 :        
        if "latitude" in data["result"]:
            latitude = data["result"]["latitude"]
            longitude = data["result"]["longitude"]
    #print("latitude, longitude", latitude, longitude)
    return latitude, longitude
    
def get_latlongs(df):
    if ("Latitude" not in df) and ("Postcode" in df):    
      try:
        df[["Latitude", "Longitude"]] = df.apply(
            lambda row: get_latlong(row["Postcode"]), axis=1, result_type="expand"
        )
      except Exception as e:
          print('Postcode Conversion failed : '+ str(e))
    return df

def get_places(df, field_name):
    if ("Latitude" not in df) and (field_name in df):    
      try:
        df[[field_name + " Latitude", field_name + " Longitude"]] = df.apply(
            lambda row: get_place(row[field_name]), axis=1, result_type="expand"
        )
      except Exception as e:
          print('Place  Conversion failed : '+ str(e))
    return df

In [15]:
stadiums = get_latlongs(stadiums)
stadiums

Unnamed: 0,Stadium,Postcode,Latitude,Longitude,Miles from Sheffield
0,OWLERTON,S6 2DE,53.406031,-1.493303,1.918934
1,MILLMOOR,S60 1HB,53.428758,-1.369397,5.185172
2,HEADINGLEY,LS6 3BR,53.816081,-1.580617,30.307328
3,BRAMALL LANE,S2 4QX,53.371341,-1.469862,0.773637
4,HILLSBOROUGH,S6 1SW,53.410844,-1.500859,2.367659
5,OAKWELL,S71 1ET,53.552266,-1.468631,11.729022
6,BELLE VUE,DN4 5DX,53.517626,-1.10875,17.50178
7,SALTERGATE,S40 4SX,53.238963,-1.434745,10.015773
8,TATTERSFIELD,DN4 5JW,53.509088,-1.113845,17.01545
9,THRUM HALL,HX1 4LG,53.721864,-1.884001,28.993035


In [18]:
#https://towardsdatascience.com/calculating-distance-between-two-geolocations-in-python-26ad3afe287b

sheffield_latlong = get_place("Sheffield")
import haversine as hs

#stadiums['dist_from_sheffield'] = hs.haversine(sheffield_latlong, (stadiums["Latitude"], stadiums["Longitude"]), unit=Unit.MILES)

stadiums['Miles from Sheffield'] = stadiums.apply(
    lambda row: hs.haversine(sheffield_latlong, (row["Latitude"], row["Longitude"]), unit=hs.Unit.MILES), axis=1, result_type="expand"
)
stadiums

Unnamed: 0,Stadium,Postcode,Latitude,Longitude,Miles from Sheffield
0,OWLERTON,S6 2DE,53.406031,-1.493303,1.918934
1,MILLMOOR,S60 1HB,53.428758,-1.369397,5.185172
2,HEADINGLEY,LS6 3BR,53.816081,-1.580617,30.307328
3,BRAMALL LANE,S2 4QX,53.371341,-1.469862,0.773637
4,HILLSBOROUGH,S6 1SW,53.410844,-1.500859,2.367659
5,OAKWELL,S71 1ET,53.552266,-1.468631,11.729022
6,BELLE VUE,DN4 5DX,53.517626,-1.10875,17.50178
7,SALTERGATE,S40 4SX,53.238963,-1.434745,10.015773
8,TATTERSFIELD,DN4 5JW,53.509088,-1.113845,17.01545
9,THRUM HALL,HX1 4LG,53.721864,-1.884001,28.993035


In [19]:
#Save for later 
stadiums.to_csv("../data/eagles/stadiums_with_latlong.csv", index=False)

## Crowd / Attendance Data

In [20]:
crowds = pd.read_csv("../data/eagles/crowds.csv")
## https://datascienceparichay.com/article/pandas-extract-year-from-datetime-column/
crowds

Unnamed: 0,Date,Opponents,Attendance
0,02/09/1984,Rochdale,1425
1,09/09/1984,Fulham,1145
2,23/09/1984,Salford,1159
3,14/10/1984,Runcorn,1076
4,28/10/1984,Bridgend,826
...,...,...,...
583,24/07/2022,Workington,644
584,07/08/2022,Whitehaven,727
585,12/08/2022,Halifax,569
586,02/09/2022,Featherstone,912


## Add the distance to sheffield

In [21]:
crowds = get_places(crowds, "Opponents")
crowds

Unnamed: 0,Date,Opponents,Attendance,Opponents Latitude,Opponents Longitude
0,02/09/1984,Rochdale,1425,53.616965,-2.156708
1,09/09/1984,Fulham,1145,51.471626,-0.211248
2,23/09/1984,Salford,1159,53.710701,-2.098607
3,14/10/1984,Runcorn,1076,53.341713,-2.731298
4,28/10/1984,Bridgend,826,56.295700,-2.978223
...,...,...,...,...,...
583,24/07/2022,Workington,644,54.643374,-3.549369
584,07/08/2022,Whitehaven,727,54.550728,-3.587463
585,12/08/2022,Halifax,569,53.722830,-1.860176
586,02/09/2022,Featherstone,912,53.674631,-1.359178


In [None]:
stadiums['Miles from Sheffield'] = stadiums.apply(
    lambda row: hs.haversine(sheffield_latlong, (row["Latitude"], row["Longitude"]), unit=hs.Unit.MILES), axis=1, result_type="expand"
)
stadiums

crowds

## Get Usable Date Information

In [None]:
crowds['Date'] = pd.to_datetime(crowds['Date'], format="%d/%m/%Y")
crowds['Year'] = crowds['Date'].dt.year
crowds['Day'] = crowds['Date'].dt.day_name()
crowds['Month'] = crowds['Date'].dt.month
crowds['Month Name'] = crowds['Date'].dt.month_name()
crowds

## Saving for later

In [None]:
crowds.to_csv("../data/eagles/crowds_clean.csv")

### Weather in Sheffield

Could do daily weather at the correct stadium location but monthly for Sheffield's Weather station will be easier at first

#### Possible Data Sources : 

- https://www.metoffice.gov.uk/research/climate/maps-and-data/historic-station-data saved as [text](../data/eagles/sheffield_montly_weather.csv)
- https://api.openweathermap.org/data/3.0/onecall/timemachine?lat=39.099724&lon=-94.578331&dt=1643803200&appid=b112bddb3ca3876644b7c695768ae96d

In [None]:
sheffield_monthly_weather = pd.read_csv("../data/eagles/sheffield_monthly_weather.csv")
sheffield_monthly_weather

In [None]:
sheffield_monthly_weather.rename(columns={
    'yyyy' : "Year",
    'mm' : "Month",
    'tmax degC' : "Max Temperature C",
    'tmin degC' : "Min Temperature C",
    'rain mm' : "Rain mm",
},inplace=True)
sheffield_monthly_weather

In [None]:
#result = pd.concat([crowds, sheffield_monthly_weather], axis=1, join="inner")
crowds_with_monthly_weather = pd.merge(crowds, sheffield_monthly_weather, how="left", on=["Year", "Month"])
crowds_with_monthly_weather.to_csv("../data/eagles/crowds_with_monthly_weather.csv", index=False)
crowds_with_monthly_weather

In [None]:
 crowds_with_monthly_weather[crowds_with_monthly_weather.isna().any(axis=1)]

## Survey Data

In [None]:
questionnaire = pd.read_csv("../data/eagles/questionnaire.csv")
questionnaire

In [None]:

questionnaire.rename(columns={
    'Could you provide your postcode? This will help us understand where people are responding from. ':"Postcode",
},inplace=True)

questionnaire


In [None]:


questionnaire_quantitive = questionnaire[[
    questionnaire.columns[1],
    questionnaire.columns[2],
    questionnaire.columns[3],
    questionnaire.columns[14],
    questionnaire.columns[15],
    questionnaire.columns[16],
    questionnaire.columns[17]
]]
questionnaire_quantitive

In [None]:
questionnaire_quantitive = get_latlongs(questionnaire_quantitive)
questionnaire_quantitive

In [None]:
questionnaire_quantitive.to_csv("../data/eagles/questionnaire_quantitive.csv", index=False)