In [None]:
#!pip install steamlit
import streamlit as st
import pandas as pd
from scipy.stats import zscore

grouped_df = pd.read_csv("/content/drive/MyDrive/dissertation/grouped_player_df3.csv")
# Get unique values from a specific column
unique_values = grouped_df["team"].unique()

# Title for the app
st.title("Player rating and recommendation")

# Dropdown widget to select a unique value
selected_value = st.selectbox("Select a team", unique_values)

st.write("Selected team:", selected_value)

team_df = grouped_df[grouped_df["team"] == selected_value]


#preprocessing dataset
def preprocess(df):
    df = df.drop(df[df['position'] == "GK"].index)

    # Extract the nationality (assuming it's in the form "Country Code Full Name")
    df['nationality'] = df['nationality'].str.split(' ', expand=True)[1]

    # Fill NaN values with 0
    df.fillna(0, inplace=True)

    # Filter rows where 'minutes' is not equal to 0
    df = df.dropna(subset=['minutes'])
    df = df[df['minutes'] >= 90]
    return df

def alter_pos(df):

    df = df.drop(df[df['position'] == "DF,GK"].index)
    return df

def analysis(df,weights):
    df.fillna(0, inplace=True)
    z_scores_attackers_df = df.apply(zscore)




    total_weight = sum(weights.values())
    #print(total_weight)
    weights_percentage = {k: v / total_weight for k, v in weights.items()}
    #st.write(z_scores_attackers_df)
    # Step 3: Multiply data with the importance weights
    attackers_cols = df.columns
    for col in attackers_cols:
        z_scores_attackers_df[col] = (z_scores_attackers_df[col] * (weights_percentage[col] ) * 10)

    # Step 4: Calculate overall ratings as percentage-based scores
    z_scores_attackers_df['rating'] = z_scores_attackers_df.sum(axis=1)
    return z_scores_attackers_df


def underperformer(rating_df,grouped_df):
    # Convert the index labels to integers
    rating_df['Row_Labels'] = rating_df.index
    underperforming_player = rating_df.iloc[-1]


    up_player_index = int(underperforming_player['Row_Labels'])
    st.write("index is",up_player_index)
    st.write(grouped_df.iloc[up_player_index])
    row = grouped_df.iloc[up_player_index]
    return row,up_player_index


def knn_modelling(X_train,z_player_data):
    from sklearn.neighbors import NearestNeighbors
    import matplotlib.pyplot as plt
    from sklearn.neighbors import NearestNeighbors
    from sklearn.metrics.pairwise import cosine_similarity
    nn = NearestNeighbors(n_neighbors=10,metric='minkowski',p=2)
    nn.fit(X_train)
    import numpy as np

    # Find the nearest neighbors of a data point in the X_test dataset
    test = np.array(z_player_data[:-1])
      #row = test_player[test_player['position'] == "Lionel Messi"]
      #print(row)
    #test = stats.iloc[34]
    test = test.reshape(-1, 35)
    #test = np.array(X_test.iloc[index])

    st.write("test data is",test)
    distances, indices = nn.kneighbors(test)
    return distances,indices

#-------------------------------------
##main function
st.write("grouped",grouped_df.shape)
st.write("grouped",team_df.shape)

processed_team_df = preprocess(team_df)           #contains only data of players based on user selection
modified_team_df = alter_pos(processed_team_df)
player_df = modified_team_df.copy()

corr_df = pd.read_csv(r"D:\dissertation\Streamlit\corr_attackers_goal.csv")

selected_columns = ['column','weight']
weights_df = pd.DataFrame(corr_df[selected_columns])

weight_dict = dict(zip(weights_df['column'], weights_df['weight']))

key_columns = list(weight_dict.keys())
value_columns = list(weight_dict.values())

rating_data = modified_team_df[key_columns]

rating_df = analysis(rating_data,weight_dict)


player_df['rating']=rating_df['rating']
rating_df = rating_df.sort_values(by='rating', ascending=False)

display_df = player_df.copy()
display_df = display_df.sort_values(by='rating',ascending=False)
st.write(display_df)

player_to_be_replaced,index = underperformer(rating_df,grouped_df)
z_player_data = rating_df.loc[index]
st.write("data used in model",player_to_be_replaced)

processed_grouped_df = preprocess(grouped_df)                              #contains entire grouped_Df dataframe
modified_grouped_df = alter_pos(processed_grouped_df)
rating_grouped_data = modified_grouped_df[key_columns]
rating_grouped_df = analysis(rating_grouped_data,weight_dict)

distances,indices = knn_modelling(rating_grouped_df,z_player_data)


st.write("Indices of Nearest Neighbors:")
st.write(indices)

st.write("\nDistances to Nearest Neighbors:")
st.write(distances)
train_indices_predicted = indices[0]
list_display = []
for ind in train_indices_predicted:
        list_display.append(modified_grouped_df.iloc[ind])

disp_df = pd.DataFrame(list_display)
st.write(disp_df)

