# Loteca

In [300]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

### Ler arquivo

In [301]:
df = pd.read_csv("BRA.csv")

### Nomes dos times

In [302]:
vocab = df['Home'].unique()

### Transformar em índices

In [305]:
word2idx = {word: i for i, word in enumerate(vocab)}

### Função para indexar os jogos

In [306]:
def text_to_vector2(text): 
    return word2idx.get(text, None)


### Indexar os resultados

In [307]:
X = []
y = []

for _, row in df.iterrows():
    X.append([text_to_vector2(row['Home']), text_to_vector2(row['Away'])])    

    n = -row['HG'] + row['AG']
    
    if np.isnan(n):
        n = 0
    
    y.append(n)


### Separar os datasets de treinamento e teste

In [308]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=True)

### Suport vector machine

In [309]:
from sklearn.svm import SVR
model = SVR(gamma='scale', C=1.0, epsilon=0.2)

### Random forest

In [310]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression
model = RandomForestRegressor(max_depth=2, random_state=0, n_estimators=100)

### Treinar o modelo

In [311]:
model.fit(X_train, y_train)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=2,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
           oob_score=False, random_state=0, verbose=0, warm_start=False)

### Avaliar precisão

In [321]:
res = []
for j in range(len(X_test)):
    r = model.predict([X_test[j]])
    res.append((r - y_test[j]))
print(round((1 - abs(np.mean(res))) * 100,0), '%')

95.0 %


### Lista de times

In [313]:
vocab

array(['Palmeiras', 'Sport Recife', 'Figueirense', 'Botafogo RJ',
       'Corinthians', 'Internacional', 'Ponte Preta', 'Bahia', 'Cruzeiro',
       'Vasco', 'Atletico GO', 'Flamengo RJ', 'Portuguesa', 'Nautico',
       'Atletico-MG', 'Coritiba', 'Santos', 'Sao Paulo', 'Fluminense',
       'Gremio', 'Vitoria', 'Criciuma', 'Atletico-PR', 'Goias',
       'Chapecoense-SC', 'Avai', 'Joinville', 'Santa Cruz', 'America MG',
       'Parana', 'Ceara'], dtype=object)

### Realizar previsão

In [329]:
p = model.predict([[word2idx.get('Joinville', None), word2idx.get('Santa Cruz', None)]])

if p < 0:
    print('Time da casa vence', round(abs(p[0])*100,0),' %')
else:
    print('Visitante vence', round(abs(p[0])*100,0),' %')

Time da casa vence 73.0  %
