In [39]:
import pandas as pd
from pathlib import Path
import plotly.express as px

In [40]:
def calculate_accuracy(df, user_prediction_cols, week = None):

    def correct_pred(col1, col2):
        if col1 == col2:
            return 1
        else:
            return 0

    # find the number of matches that were predicted that week
    no_matches = df.shape[0]
    # get the name of the predictors
    predictors = []
    for col in user_prediction_cols:
        predictors.append(col.strip(" Prediction"))
        df[col.strip(" Prediction")] = df.apply(
            lambda x: correct_pred(x[col], x['FTR']), axis = 1)
    summed_filtered_df = df.sum()
    summed_filtered_df = summed_filtered_df[predictors]
    summed_results_df = pd.DataFrame(
        {'Predictor': summed_filtered_df.index, 'Correct Predictions': summed_filtered_df.values})
    if week:
        summed_results_df["Week"] = week
    summed_results_df['Accuracy of Predictions (%)'] = (summed_results_df['Correct Predictions'] / no_matches) * 100
    return summed_results_df


def calculate_accuracy_transform(df, mode):
    # get the columns of the dataframe
    input_df_columns = df.columns.to_list()
    # get just the prediction columns
    user_prediction_cols = []
    for column in input_df_columns:
        if "Prediction" in column:
            user_prediction_cols.append(column)

    if mode == "weekly":
        # define the empty transformed (and output) df
        accuracy_transform_df = pd.DataFrame(
            columns = ['Predictor', 'Correct Predictions', 'Week', 'Accuracy of Predictions (%)'])

        weeks_present = set(df["Week"].to_list())

        #  filter for each week
        for week in weeks_present:
            # filter to only show one week
            filtered_df = df[df["Week"] == week].reset_index()
            summed_results_df = calculate_accuracy(filtered_df, user_prediction_cols, week = week)
            accuracy_transform_df = pd.concat([accuracy_transform_df, summed_results_df], ignore_index = True)

    elif mode == "overall":
        accuracy_transform_df = pd.DataFrame(
            columns = ['Predictor', 'Correct Predictions', 'Accuracy of Predictions (%)'])
        summed_results_df = calculate_accuracy(df, user_prediction_cols)
        accuracy_transform_df = pd.concat([accuracy_transform_df, summed_results_df], ignore_index = True)

    else:
        raise Exception

    return accuracy_transform_df


In [16]:
path = str(Path().absolute().parent)
data_dir = path + "/data"
aggregated_results_loc = data_dir + "/aggregated_results/20_21/predictions_and_results_log.csv"
aggregated_results = pd.read_csv(aggregated_results_loc)
display(aggregated_results)

Unnamed: 0,Date,Time,Week,HomeTeam,AwayTeam,APPLE Prediction,DD Prediction,JR Prediction,LC Prediction,MW Prediction,FTR
0,09/12/2020,12:30:00,1,Fulham,Arsenal,A,A,A,A,A,A
1,09/12/2020,15:00:00,1,Crystal Palace,Southampton,A,D,D,H,A,H
2,09/12/2020,20:00:00,1,West Ham,Newcastle,H,A,A,H,A,A
3,09/12/2020,17:30:00,1,Liverpool,Leeds,H,H,H,H,H,H
4,13/09/2020,14:00:00,1,West Brom,Leicester,H,D,A,A,A,H
5,13/09/2020,16:30:00,1,Tottenham,Everton,H,A,H,H,D,A
6,14/09/2020,20:00:00,1,Brighton,Chelsea,A,D,A,A,A,A
7,19/09/2020,12:30:00,2,Everton,West Brom,H,H,H,H,H,H
8,19/09/2020,15:00:00,2,Leeds,Fulham,H,H,H,H,H,H
9,19/09/2020,17:30:00,2,Man United,Crystal Palace,H,H,H,H,H,A


In [43]:
top_6_teams = ["Arsenal", "Liverpool", "Chelsea", "Man City", "Man United", "Tottenham"]
newly_promoted_teams = ["Leeds", 'Fulham', "West Brom"]

In [46]:
f_df = aggregated_results[(aggregated_results["AwayTeam"].isin(top_6_teams))| (aggregated_results["HomeTeam"].isin(top_6_teams))]
display(f_df)

Unnamed: 0,Date,Time,Week,HomeTeam,AwayTeam,APPLE Prediction,DD Prediction,JR Prediction,LC Prediction,MW Prediction,FTR
0,09/12/2020,12:30:00,1,Fulham,Arsenal,A,A,A,A,A,A
3,09/12/2020,17:30:00,1,Liverpool,Leeds,H,H,H,H,H,H
5,13/09/2020,16:30:00,1,Tottenham,Everton,H,A,H,H,D,A
6,14/09/2020,20:00:00,1,Brighton,Chelsea,A,D,A,A,A,A
9,19/09/2020,17:30:00,2,Man United,Crystal Palace,H,H,H,H,H,A
10,19/09/2020,20:00:00,2,Arsenal,West Ham,H,H,H,H,H,H
11,20/09/2020,12:00:00,2,Southampton,Tottenham,A,D,A,A,A,A
13,20/09/2020,16:30:00,2,Chelsea,Liverpool,A,A,A,A,A,A
16,21/09/2020,20:15:00,2,Wolves,Man City,A,D,A,A,A,A


In [47]:
test_res = calculate_accuracy_transform(f_df, mode = "overall")
display(test_res)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Predictor,Correct Predictions,Accuracy of Predictions (%)
0,APPLE,7,77.7778
1,DD,5,55.5556
2,JR,7,77.7778
3,LC,7,77.7778
4,MW,7,77.7778
