<a href="https://colab.research.google.com/github/YB73/Football_Player_Prediction/blob/main/FootballMLModels(1).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import copy
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.tree import DecisionTreeRegressor  
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import r2_score, accuracy_score, mean_squared_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
import seaborn as sns
import warnings
import pandas as pd
%matplotlib inline
warnings.filterwarnings('ignore')

In [None]:
plt.style.use('bmh')

In [None]:
data=pd.read_csv("/content/drive/MyDrive/Football.csv")
data.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3.0,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2.0,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2.0,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1.0,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2.0,France,0,4,1,1,0


In [None]:
data.isnull().sum()

name            0
club            0
age             0
position        0
position_cat    0
market_value    0
page_views      0
fpl_value       0
fpl_sel         0
fpl_points      0
region          1
nationality     0
new_foreign     0
age_cat         0
club_id         0
big_club        0
new_signing     0
dtype: int64

In [None]:
data['region'].value_counts()[:].index.tolist()


[2.0, 1.0, 4.0, 3.0]

In [None]:
data['region'].mode()

0    2.0
dtype: float64

In [None]:
# replacing null values with mode
data['region'] = data['region'].fillna(2.0)

In [None]:
data.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3.0,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2.0,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2.0,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1.0,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2.0,France,0,4,1,1,0


In [None]:
data.isnull().sum()

name            0
club            0
age             0
position        0
position_cat    0
market_value    0
page_views      0
fpl_value       0
fpl_sel         0
fpl_points      0
region          0
nationality     0
new_foreign     0
age_cat         0
club_id         0
big_club        0
new_signing     0
dtype: int64

In [None]:
data.skew()

age             0.135533
position_cat    0.182976
market_value    2.162548
page_views      3.244946
fpl_value       2.136684
fpl_points      0.825879
region          0.850956
new_foreign     5.100749
age_cat         0.172058
club_id         0.015017
big_club        0.856601
new_signing     2.019198
dtype: float64

In [None]:
class DataPreprocessing:
    def __init__(self,data):
        
        self.data = data
        self.cols_to_be_dropped = ['name','club_id','age_cat','nationality']
        self.cols_to_be_encoded = ['club','position','position_cat','region']
        
    def addPosCat(self,inputCat):

        if inputCat == 1:
            return('Attackers')
        elif inputCat == 2:
            return('Midfielders')
        elif inputCat == 3:
            return('Defenders')
        else:
            return('Goalkeeper')
        
    def addRegion(self, inpregion):
    
        if inpregion == 1:
            return('England')
        elif inpregion == 2:
            return('EU')
        elif inpregion == 3:
            return('Americans')
        else:
            return('Rest of World')
        
    def columnTypeConversion(self):
        
        # Converting fpl selection into numeric variable 
        self.data['fpl_sel'] = self.data['fpl_sel'].map(lambda x: str(x)[:-1]).astype('float')
        
    def logTransformation(self):
        
        # log transformation on page views variable as it has a high skew
        self.data['page_views'] = self.data['page_views'].apply(np.log)
        
    # Encoding the categorical variables using pandas dummies 
    def dataEncoding(self):

        self.data = pd.get_dummies(self.data, columns = self.cols_to_be_encoded, drop_first = True)
        
    def getProcessedData(self):
        self.data = self.data.drop(self.cols_to_be_dropped, inplace = False, axis = 1)
        self.data['position_cat'] = self.data['position_cat'].apply(self.addPosCat)
        self.data['region'] = self.data['region'].apply(self.addRegion)
        self.columnTypeConversion()
        self.dataEncoding()
        self.logTransformation()
        return self.data

In [None]:
data_obj = DataPreprocessing(data) 
encoded_data = data_obj.getProcessedData()

In [None]:
encoded_data.head()


Unnamed: 0,age,market_value,page_views,fpl_value,fpl_sel,fpl_points,new_foreign,big_club,new_signing,club_Bournemouth,club_Brighton+and+Hove,club_Burnley,club_Chelsea,club_Crystal+Palace,club_Everton,club_Huddersfield,club_Leicester+City,club_Liverpool,club_Manchester+City,club_Manchester+United,club_Newcastle+United,club_Southampton,club_Stoke+City,club_Swansea,club_Tottenham,club_Watford,club_West+Brom,club_West+Ham,position_CB,position_CF,position_CM,position_DM,position_GK,position_LB,position_LM,position_LW,position_RB,position_RM,position_RW,position_SS,position_cat_Defenders,position_cat_Goalkeeper,position_cat_Midfielders,region_EU,region_England,region_Rest of World
0,28,65.0,8.373092,12.0,17.1,264,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,28,50.0,8.388223,9.5,5.6,167,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
2,35,7.0,7.332369,5.5,5.9,134,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0
3,28,20.0,7.780303,7.5,1.5,122,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0
4,31,22.0,6.81564,6.0,0.7,121,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0


In [None]:
output_var = 'market_value'
X = encoded_data[encoded_data.columns[~encoded_data.columns.isin([output_var])]]
y = encoded_data[[output_var]]
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)
x_train = x_train.reset_index(drop = True)
x_test = x_test.reset_index(drop = True)
y_train = y_train.reset_index(drop = True)
y_test = y_test.reset_index(drop = True)

In [None]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(368, 45)
(93, 45)
(368, 1)
(93, 1)


In [None]:
# min max scaling on input data
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [None]:
# Model 1 - Linear regression
lin_model = LinearRegression()
lin_model.fit(x_train, y_train)

acc_train = lin_model.score(x_train, y_train)
print("R^2 score of training data - " + str(acc_train))

train_pred = lin_model.predict(x_train) 
mse_train = mean_squared_error(y_train,train_pred)
print("Root Mean Square Error on training data - " + str(mse_train**(0.5)))


R^2 score of training data - 0.814830133908506
Root Mean Square Error on training data - 5.266451210853647


In [None]:
def KFoldVerify(model, X, Y): # to calculte cross validated scros
    cv = KFold(n_splits=5, random_state=1, shuffle=True)
    r2_scores = cross_val_score(model, X, Y, cv= cv, scoring = 'r2')
    return r2_scores.mean()

In [None]:
# Lasso Regressor
lasso_model = Lasso(alpha=0.1, max_iter = 10000)
cross_acc_train = KFoldVerify(lasso_model, x_train, y_train)
lasso_model.fit(x_train, y_train)

acc_train = lasso_model.score(x_train, y_train)
print("Cross Validation score - " + str(cross_acc_train))
print()


train_pred = lasso_model.predict(x_train)
mse_train = mean_squared_error(y_train,train_pred)
print("R^2 score of training data - " + str(acc_train))
print("Root Mean Square Error of training data - " + str(mse_train**(0.5)))

Cross Validation score - 0.7359163156474716

R^2 score of training data - 0.7806060507987487
Root Mean Square Error of training data - 5.732515422233922


In [None]:
# Ridge Regressor
ridge_model = Ridge(alpha=0.1,max_iter=10000)
cross_acc_train = KFoldVerify(ridge_model, x_train, y_train)
ridge_model.fit(x_train, y_train)

acc_train = ridge_model.score(x_train, y_train)
print("Cross Validation score - " + str(cross_acc_train))
print()


train_pred = ridge_model.predict(x_train)
mse_train = mean_squared_error(y_train,train_pred)
print("R^2 score of training data - " + str(acc_train))
print("Root Mean Square Error of training data - " + str(mse_train**(0.5)))

Cross Validation score - 0.7460964179639564

R^2 score of training data - 0.8146402275054694
Root Mean Square Error of training data - 5.269151100770489


In [None]:
# support vector regressor
SVR_regr = SVR(C = 10000.0, epsilon=0.001, kernel='poly', degree=3)
cross_acc_train = KFoldVerify(SVR_regr, x_train, y_train)
SVR_regr.fit(x_train, y_train)

acc_train = SVR_regr.score(x_train, y_train)
print("Cross Validation score - " + str(cross_acc_train))
print()


train_pred = SVR_regr.predict(x_train)
mse_train = mean_squared_error(y_train,train_pred)
print("R^2 score of training data - " + str(acc_train))
print("Root Mean Square Error of training data - " + str(mse_train**(0.5)))

Cross Validation score - 0.545933827784743

R^2 score of training data - 0.9999999922680117
Root Mean Square Error of training data - 0.0010761641024528922


In [None]:
# nearest neighbour regressor
K_N_N = KNeighborsRegressor(n_neighbors=10, metric='minkowski')
cross_acc_train = KFoldVerify(K_N_N, x_train, y_train)
K_N_N.fit(x_train, y_train)

acc_train = K_N_N.score(x_train, y_train)
print("Cross Validation score - " + str(cross_acc_train))
print()


train_pred = K_N_N.predict(x_train)
mse_train = mean_squared_error(y_train,train_pred)
print("R^2 score of training data - " + str(acc_train))
print("Root Mean Square Error of training data - " + str(mse_train**(0.5)))

Cross Validation score - 0.4962030109429455

R^2 score of training data - 0.6182027635314457
Root Mean Square Error of training data - 7.56221985197957


In [None]:


# Decision Tree regressor
tree_model = DecisionTreeRegressor(random_state = 0,max_depth=5)  
cross_acc_train = KFoldVerify(tree_model, x_train, y_train)
tree_model.fit(x_train, y_train)

acc_train = tree_model.score(x_train, y_train)
print("Cross Validation score - " + str(cross_acc_train))
print()


train_pred = tree_model.predict(x_train)
mse_train = mean_squared_error(y_train,train_pred)
print("R^2 score of training data - " + str(acc_train))
print("Root Mean Square Error of training data - " + str(mse_train**(0.5)))

Cross Validation score - 0.5583514617301549

R^2 score of training data - 0.8664581197052755
Root Mean Square Error of training data - 4.472410437795188


In [None]:
# fitting a random forest regressor
Forest_regr = RandomForestRegressor(max_depth=5, random_state=0)
cross_acc_train = KFoldVerify(Forest_regr, x_train, y_train)
Forest_regr.fit(x_train, y_train)

acc_train = Forest_regr.score(x_train, y_train)
print("Cross Validation score - " + str(cross_acc_train))
print()


train_pred = Forest_regr.predict(x_train)
mse_train = mean_squared_error(y_train,train_pred)
print("R^2 score of training data - " + str(acc_train))
print("Root Mean Square Error of training data - " + str(mse_train**(0.5)))

Cross Validation score - 0.7473329373935097

R^2 score of training data - 0.9070702344004276
Root Mean Square Error of training data - 3.7308718316498406


In [None]:
#  gradient boosting regression
params = {'n_estimators': 300,
          'max_depth': 2,
          'min_samples_split': 9,
          'learning_rate': 0.1,
          'loss': 'ls'}
gbr_model = GradientBoostingRegressor(**params)
cross_acc_train = KFoldVerify(gbr_model, x_train, y_train)
gbr_model.fit(x_train, y_train)

acc_train = gbr_model.score(x_train, y_train)
print("Cross Validation score - " + str(cross_acc_train))
print()


train_pred = gbr_model.predict(x_train)
mse_train = mean_squared_error(y_train,train_pred)
print("R^2 score of training data - " + str(acc_train))
print("Root Mean Square Error of training data - " + str(mse_train**(0.5)))

Cross Validation score - 0.8108321223996636

R^2 score of training data - 0.9687399771012036
Root Mean Square Error of training data - 2.1638520080838846


In [None]:
pip install Flask



In [None]:
pip install flask_ngrok

Collecting flask_ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Installing collected packages: flask-ngrok
Successfully installed flask-ngrok-0.0.25


In [None]:
from flask_ngrok import run_with_ngrok
from flask import Flask

In [None]:
from flask import Flask, render_template, request, Response
app =Flask(__name__ , template_folder = '/content/drive')

df = pd.read_csv('/content/drive/MyDrive/Football.csv')

df1=df
df1.drop(["name","club","position","nationality","fpl_sel"],axis='columns',inplace=True)

x=df1[df1.columns[~df1.columns.isin(["market_value"])]].to_numpy()
y=df1.market_value.to_numpy()
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.20)

run_with_ngrok(app)

@app.route("/" , methods=['GET','POST'])
def index():
    return render_template('assignment.html')
@app.route("/data", methods=['GET','POST'])
def data():     
    if request.method == "POST":
         opt= request.form['excellent']
         m = int(opt)*1
         a= request.form['age']
         b= request.form['position_cat']
         c= request.form['page_views']
         d= request.form['fpl_value']
         e= request.form['fpl_points']
         f= request.form['region']
         g= request.form['new_foreign']
         h= request.form['age_cat']
         i= request.form['club_id']
         j= request.form['big_club']
         k= request.form['new_signing']
         li=[a,b,c,d,e,f,g,h,i,j,k]
         model=SVR()
         model.fit(x_train,y_train)
         y_pred_svr=model.predict(x_test)
         market_value=model.predict([li])
    return render_template('data.html',market_value=market_value )

if __name__ == "__main__":
    app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


 * Running on http://ab20-34-80-214-255.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040


[2021-10-26 16:34:37,777] ERROR in app: Exception on / [GET]
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/flask/app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "/usr/local/lib/python3.7/dist-packages/flask/app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/usr/local/lib/python3.7/dist-packages/flask/app.py", line 1821, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "/usr/local/lib/python3.7/dist-packages/flask/_compat.py", line 39, in reraise
    raise value
  File "/usr/local/lib/python3.7/dist-packages/flask/app.py", line 1950, in full_dispatch_request
    rv = self.dispatch_request()
  File "/usr/local/lib/python3.7/dist-packages/flask/app.py", line 1936, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "<ipython-input-33-4372622279a4>", line 17, in index
    return render_template('assignment.html')
  File "