# Be able to investigate client requirements for data analysis
# 2.4 Quantitative data analysis

* mean
* median
* standard deviation
* range

## Set up

In [1]:
try:
    import micropip
    await micropip.install(["pyoliteutils", "textblob"])
except:
    pass

In [2]:
from pyoliteutils import *
import pandas as pd

## Crowd / Attendance Data

In [3]:
crowds = pd.read_csv("../data/eagles/crowds.csv")
## https://datascienceparichay.com/article/pandas-extract-year-from-datetime-column/
crowds

Unnamed: 0,Date,Opponents,Attendance
0,02/09/1984,Rochdale,1425
1,09/09/1984,Fulham,1145
2,23/09/1984,Salford,1159
3,14/10/1984,Runcorn,1076
4,28/10/1984,Bridgend,826
...,...,...,...
583,24/07/2022,Workington,644
584,07/08/2022,Whitehaven,727
585,12/08/2022,Halifax,569
586,02/09/2022,Featherstone,912


## Get Usable Date Information

In [4]:
crowds['Date'] = pd.to_datetime(crowds['Date'], format="%d/%m/%Y")
crowds['Year'] = crowds['Date'].dt.year
crowds['Day'] = crowds['Date'].dt.day_name()
crowds['Month'] = crowds['Date'].dt.month
crowds['Month Name'] = crowds['Date'].dt.month_name()
crowds

Unnamed: 0,Date,Opponents,Attendance,Year,Day,Month,Month Name
0,1984-09-02,Rochdale,1425,1984,Sunday,9,September
1,1984-09-09,Fulham,1145,1984,Sunday,9,September
2,1984-09-23,Salford,1159,1984,Sunday,9,September
3,1984-10-14,Runcorn,1076,1984,Sunday,10,October
4,1984-10-28,Bridgend,826,1984,Sunday,10,October
...,...,...,...,...,...,...,...
583,2022-07-24,Workington,644,2022,Sunday,7,July
584,2022-08-07,Whitehaven,727,2022,Sunday,8,August
585,2022-08-12,Halifax,569,2022,Friday,8,August
586,2022-09-02,Featherstone,912,2022,Friday,9,September


## Saving for later

In [5]:
crowds.to_csv("../data/eagles/crowds_clean.csv")

## Other useful Opponent / Match Data?

### Location of opponents, and distance to Sheffield

#### Possible Data Sources : 
- https://api.postcodes.io/places?q=[query]
- Nominatum / GeoPy
    - https://geopy.readthedocs.io/en/stable/
    - https://nominatim.org/release-docs/latest/api/Search/ 
    - https://medium.com/@gopesh3652/geocoding-with-python-using-nominatim-a-beginners-guide-220b250ca48d 



### Weather in Sheffield

Could do daily weather at the correct stadium location but monthly for Sheffield's Weather station will be easier at first

#### Possible Data Sources : 

- https://www.metoffice.gov.uk/research/climate/maps-and-data/historic-station-data saved as [text](../data/eagles/sheffield_montly_weather.csv)
- https://api.openweathermap.org/data/3.0/onecall/timemachine?lat=39.099724&lon=-94.578331&dt=1643803200&appid=b112bddb3ca3876644b7c695768ae96d

In [6]:
sheffield_monthly_weather = pd.read_csv("../data/eagles/sheffield_monthly_weather.csv")
sheffield_monthly_weather

Unnamed: 0,yyyy,mm,tmax degC,tmin degC,af days,rain mm,sun hours
0,1883,1,6.3,1.7,6,122.1,---
1,1883,2,8,2.8,2,69.8,---
2,1883,3,4.8,-1.6,23,29.6,---
3,1883,4,12.2,3.8,2,74,---
4,1883,5,14.7,6.2,0,31.2,---
...,...,...,...,...,...,...,...
1689,2023,10,14.9,8.9,0,203.2,77.5# Provisional
1690,2023,11,9.4,5,2,108.2,68.1# Provisional
1691,2023,12,9.1,* 4.7,5,174.3,* 31.5# Provisional
1692,2024,1,7,2.6,6,72.4,54.9# Provisional


In [7]:
sheffield_monthly_weather.rename(columns={
    'yyyy' : "Year",
    'mm' : "Month",
    'tmax degC' : "Max Temperature C",
    'tmin degC' : "Min Temperature C",
    'rain mm' : "Rain mm",
},inplace=True)
sheffield_monthly_weather

Unnamed: 0,Year,Month,Max Temperature C,Min Temperature C,af days,Rain mm,sun hours
0,1883,1,6.3,1.7,6,122.1,---
1,1883,2,8,2.8,2,69.8,---
2,1883,3,4.8,-1.6,23,29.6,---
3,1883,4,12.2,3.8,2,74,---
4,1883,5,14.7,6.2,0,31.2,---
...,...,...,...,...,...,...,...
1689,2023,10,14.9,8.9,0,203.2,77.5# Provisional
1690,2023,11,9.4,5,2,108.2,68.1# Provisional
1691,2023,12,9.1,* 4.7,5,174.3,* 31.5# Provisional
1692,2024,1,7,2.6,6,72.4,54.9# Provisional


In [8]:
#result = pd.concat([crowds, sheffield_monthly_weather], axis=1, join="inner")
result = pd.merge(crowds, sheffield_monthly_weather, how="left", on=["Year", "Month"])
result.to_csv("../data/eagles/crowds_with_monthly_weather.csv", index=False)
result

Unnamed: 0,Date,Opponents,Attendance,Year,Day,Month,Month Name,Max Temperature C,Min Temperature C,af days,Rain mm,sun hours
0,1984-09-02,Rochdale,1425,1984,Sunday,9,September,16.7,10.4,0,117.6,86.1
1,1984-09-09,Fulham,1145,1984,Sunday,9,September,16.7,10.4,0,117.6,86.1
2,1984-09-23,Salford,1159,1984,Sunday,9,September,16.7,10.4,0,117.6,86.1
3,1984-10-14,Runcorn,1076,1984,Sunday,10,October,14.3,7.7,0,82.7,75.6
4,1984-10-28,Bridgend,826,1984,Sunday,10,October,14.3,7.7,0,82.7,75.6
...,...,...,...,...,...,...,...,...,...,...,...,...
583,2022-07-24,Workington,644,2022,Sunday,7,July,23.2,14.5,0,23.8,187.8*
584,2022-08-07,Whitehaven,727,2022,Sunday,8,August,23.9,13.6,0,56.4,234.5*
585,2022-08-12,Halifax,569,2022,Friday,8,August,23.9,13.6,0,56.4,234.5*
586,2022-09-02,Featherstone,912,2022,Friday,9,September,18.4,11.1,0,50,124.8*


## Stadium Data

In [None]:
stadiums = pd.read_csv("../data/eagles/stadiums.csv")
stadiums

## Adding the Location

In [None]:
import postcodes_io_api
api  = postcodes_io_api.Api()

def get_latlong(postcode):
    latitude = None
    longitude = None
    
    data = api.get_postcode(postcode)
    #print("data", data)
    if data["status"] != 200 :
        data = api.get_outcode(postcode)
    
    if data["status"] == 200 :        
        if "latitude" in data["result"]:
            latitude = data["result"]["latitude"]
            longitude = data["result"]["longitude"]
    #print("latitude, longitude", latitude, longitude)
    return latitude, longitude
    

def get_latlongs(df):
    if ("Latitude" not in df) and ("Postcode" in df):    
      try:
        df[["Latitude", "Longitude"]] = df.apply(
            lambda row: get_latlong(row["Postcode"]), axis=1, result_type="expand"
        )
      except Exception as e:
          print('Postcode Conversion failed : '+ str(e))
    return df

In [None]:
stadiums = get_latlongs(stadiums)
stadiums


In [None]:
stadiums.to_csv("../data/eagles/stadiums_with_latlong.csv", index=False)

## Survey Data

In [None]:
questionnaire = pd.read_csv("../data/eagles/questionnaire.csv")
questionnaire

In [None]:

questionnaire.rename(columns={
    'Could you provide your postcode? This will help us understand where people are responding from. ':"Postcode",
},inplace=True)

questionnaire


In [None]:


questionnaire_quantitive = questionnaire[[
    questionnaire.columns[1],
    questionnaire.columns[2],
    questionnaire.columns[3],
    questionnaire.columns[14],
    questionnaire.columns[15],
    questionnaire.columns[16],
    questionnaire.columns[17]
]]
questionnaire_quantitive

In [None]:
questionnaire_quantitive = get_latlongs(questionnaire_quantitive)
questionnaire_quantitive

In [None]:
questionnaire_quantitive.to_csv("../data/eagles/questionnaire_quantitive.csv", index=False)