### Modelo de Regresión Support Vector Machine

##### Paso 1: importar los paquetes necesarios

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import r2_score, mean_squared_error
from scipy import stats
from scipy.stats import pearsonr
from sklearn.ensemble import RandomForestClassifier

In [2]:
# Leer en los datos 
data = pd.read_csv('players_15.csv')

# Seleccionar el subconjunto de datos
dataset = data[["short_name", "overall", "pace", "shooting", "passing", "dribbling", "defending", "physic"]]

# Reemplazar los valores nan por 0
dataset = dataset.replace(np.nan, 0)
dataset.head()

Unnamed: 0,short_name,overall,pace,shooting,passing,dribbling,defending,physic
0,L. Messi,93,93.0,89.0,86.0,96.0,27.0,63.0
1,Cristiano Ronaldo,92,93.0,93.0,81.0,91.0,32.0,79.0
2,A. Robben,90,93.0,86.0,83.0,92.0,32.0,64.0
3,Z. Ibrahimović,90,76.0,91.0,81.0,86.0,34.0,86.0
4,M. Neuer,90,0.0,0.0,0.0,0.0,0.0,0.0


In [3]:
# Visualizamos la correlacion de los datos
dataset.corr()

Unnamed: 0,overall,pace,shooting,passing,dribbling,defending,physic
overall,1.0,0.146646,0.335387,0.360834,0.30759,0.221482,0.24085
pace,0.146646,1.0,0.803523,0.844271,0.9099,0.569941,0.794396
shooting,0.335387,0.803523,1.0,0.870799,0.914318,0.378075,0.713964
passing,0.360834,0.844271,0.870799,1.0,0.951239,0.658769,0.816445
dribbling,0.30759,0.9099,0.914318,0.951239,1.0,0.555199,0.789923
defending,0.221482,0.569941,0.378075,0.658769,0.555199,1.0,0.820978
physic,0.24085,0.794396,0.713964,0.816445,0.789923,0.820978,1.0


In [4]:
# definir variables de predicción y respuesta
x = dataset[["pace", "shooting", "passing", "dribbling", "defending", "physic"]]
y = dataset["overall"]

In [5]:
# Separar los datos de entrenamiento y prueba
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3,random_state=42) # 70% entrenamiento y 30% prueba

In [6]:
# Iniciar el modelo
forest = RandomForestClassifier()
forest.fit(x_train, y_train)

RandomForestClassifier()

In [7]:
# Generar una prediccion
y_pred = forest.predict(x_test)

In [8]:
# r2_score
r = r2_score(y_test, y_pred)
print("Valor R^2: %.3f"%r)

Valor R^2: 0.802


In [9]:
# mean squared error
m = mean_squared_error(y_test, y_pred)
print("Valor RME: %.3f"%m)

Valor RME: 10.256


In [10]:
# p_value
r, p = stats.ttest_ind(y_test, y_pred)
print("Valor P_value: %.3f"%p)

Valor P_value: 0.319


In [11]:
# Pearson
r, p = stats.pearsonr(y_test, y_pred)
print("Factor de Pearson: %.3f"%r)

Factor de Pearson: 0.896


In [12]:
# Presicion del modelo
print("Precision del modelo: ", forest.score(x_test, y_test))

Precision del modelo:  0.225
