<a href="https://colab.research.google.com/github/Yael313255748/LinearRegression/blob/main/FIFA20_LINEAR_REGRESSION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras import layers,losses,optimizers,Sequential,constraints
from sklearn.metrics import r2_score , mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
df=pd.read_csv('/content/drive/MyDrive/datasets/FIFA/players_20.csv')
df.head()

In [None]:
df.info()

In [None]:
df[df.columns[df.dtypes==int]].info()

In [None]:
column=['age','height_cm','weight_kg'	,'sofifa_id','potential','value_eur','wage_eur','international_reputation','weak_foot','skill_moves']
df2=df[df.columns[(df.dtypes!=object) & (df.dtypes!=float)]].drop(column,axis=1)
df2.head()

In [None]:
df2.isnull().sum()

In [None]:
X=np.array(df2[df2.columns[df2.columns!='overall']],dtype=float)
y=np.array(df2[['overall']],dtype=float)
print(X[:5,:5])
print(y[:5])

In [None]:
scaler=MinMaxScaler()
X=scaler.fit_transform(X)
print(X[:5,:5])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.8,random_state=100)

In [None]:
print(X_train.shape)
print(X_test.shape)

In [None]:
loss=losses.MeanSquaredError()
optimizer=optimizers.SGD(learning_rate=0.02)
epochs=40
batch_size=32
def create_lm():
  lm=Sequential()
  lm.add(layers.Dense(1,activation='linear',input_shape=(X.shape[1],),kernel_constraint=constraints.NonNeg(),bias_constraint=constraints.NonNeg()))
  lm.compile(optimizer,loss)
  return lm

In [None]:
def compute_coefs_metrics_pred(X,y,names_X):
  X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.8,random_state=100)
  lm=create_lm()
  lm.fit(X_train,y_train,epochs=epochs,batch_size=batch_size)

  y_train_pred=lm.predict(X_train)
  y_test_pred=lm.predict(X_test)
  predictions=lm.predict(X)

  train_mse=mean_squared_error(y_train,y_train_pred)
  test_mse=mean_squared_error(y_test,y_test_pred)

  train_r2=r2_score(y_train,y_train_pred)
  test_r2=r2_score(y_test,y_test_pred)
  met_str='Mean Squared Error\n\nTrain: {} Test: {}\n\nR2 Score\n\nTrain: {} Test: {}'.format(round(train_mse,3),round(test_mse,3),round(train_r2,3),round(test_r2,3))
  met={'MSE':{'train':train_mse,'test':test_mse},'R2':{'train':train_r2,'test':test_r2},'STR':met_str}
  weights=lm.get_weights()
  coefs=pd.DataFrame(np.concatenate((weights[1].reshape(1,1),weights[0].reshape(1,-1)),axis=1).T,index=names_X,columns=['Coefs'])

  return lm, met, coefs, predictions

In [None]:
var_names=list(df2.columns[df2.columns!='overall'])
var_names.insert(0,'Intercept')

In [None]:
lm,met,coefs,predictions=compute_coefs_metrics_pred(X,y,var_names)

In [None]:
print(met['STR'])

In [None]:
coefs

In [None]:
df['overall_prediction']=np.round(predictions)
df[['short_name','overall','overall_prediction']].head(10)

In [None]:
df.query('nationality=="Mexico"')[['short_name','overall','overall_prediction']].head(10)

In [None]:
pca=PCA()
pca.fit(X)
CPX=pca.transform(X)

In [None]:
plt.figure(figsize=(10,8))
plt.scatter(CPX[:,0],CPX[:,1])
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.plot();

In [None]:
kmean=KMeans(n_clusters=4,random_state=1)
kmean.fit(X)
clusters=kmean.predict(X)

In [None]:
plt.figure(figsize=(10,8))
plt.scatter(CPX[:,0],CPX[:,1],c=clusters)
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.plot();

In [None]:
clusters=clusters.reshape(-1,1)
df['clusters']=clusters
df[['short_name','clusters']].head(20)

In [None]:
Clust=np.unique(clusters)
def fit_models(C):
  models=[]
  met=[]
  coefs=[]
  df_=[]
  for i in C:
    data=df2[df['clusters']==i]

    X=np.array(data.drop('overall',axis=1),dtype=float)

    y=np.array(data[['overall']],dtype=float)

    X=scaler.fit_transform(X)

    model , metric, coef, prediction=compute_coefs_metrics_pred(X,y,var_names)
    print('---------------------------------------------------------------')
    models.append(model)
    met.append(metric)
    coefs.append(coef)
    data=df[df['clusters']==i][['short_name','nationality','overall']]

    data['overall_predict']=np.round(prediction)
    df_.append(data)
  return models,met,df_,coefs

In [None]:
models,met,data_frames,coefs=fit_models(Clust)

In [None]:
j=0
for i in met:
  print('Cluster {}'.format(j))
  print(i['STR'],'\n-------------------------------------------')
  j+=1

In [None]:
data_frames[0].head(10)

In [None]:
data_frames[1].head(10)

In [None]:
data_frames[2].head(10)

In [None]:
data_frames[3].head(10)

In [None]:
coefs[0]

In [None]:
data_frames[3][data_frames[3]['nationality']=='Mexico'].head(10)

In [None]:
data_frames[0][data_frames[0]['nationality']=='Mexico'].head(10)

In [None]:
data_frames[2][data_frames[2]['nationality']=='Mexico'].head(10)

In [None]:
data_frames[1][data_frames[1]['nationality']=='Mexico'].head(10)

Unnamed: 0,short_name,nationality,overall,overall_predict
1067,C. Salcedo,Mexico,77,76.0
1356,N. Araújo,Mexico,76,77.0
2698,C. Montes,Mexico,73,73.0
3797,J. Pereira,Mexico,72,72.0
4173,A. Briseño,Mexico,71,71.0
4387,L. Quintana,Mexico,71,71.0
4476,J. Domínguez,Mexico,71,71.0
5173,H. Mier,Mexico,70,71.0
5646,E. Tercero,Mexico,70,70.0
6304,I. González,Mexico,69,68.0
