In [24]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Read the data
df = pd.read_csv('dpcmajor.csv')
test = pd.read_csv('testdota.csv')

In [25]:
# Preprocessing
label_encoder = LabelEncoder()
df[df.columns[0]] = label_encoder.fit_transform(df[df.columns[0]])
df.rename(columns={df.columns[0]: 'TeamId'}, inplace=True)
test[test.columns[0]] = label_encoder.fit_transform(test[test.columns[0]])
test.rename(columns={test.columns[0]: 'TeamId'}, inplace=True)

dicionario = df.iloc[:, :2]
dicionario_test = test.iloc[:, :2]
df_relevant = df.drop(df.columns[1], axis=1)
test_relevant = test.drop(test.columns[1], axis=1)


In [26]:
non_relevant_variables = ['Winrate', 'Overall', 'In Wins', 'In Losses', 'Shift']
df_relevant.drop(columns=non_relevant_variables, inplace=True)
df_relevant['winrate'] = df['Wins'] / (df['Wins'] + df['Losses'])
df_relevant.drop(['Wins', 'Losses', 'As Radiant', 'As Dire'], axis=1, inplace=True)
test_relevant.drop(columns=non_relevant_variables, inplace=True)
test_relevant['winrate'] = test['Wins'] / (test['Wins'] + test['Losses'])
test_relevant.drop(['Wins', 'Losses', 'As Radiant', 'As Dire'], axis=1, inplace=True)

df_relevant['Kills'] = df_relevant['Kills'].str.replace(',', '.').astype(float)
df_relevant['Deaths'] = df_relevant['Deaths'].str.replace(',', '.').astype(float)
df_relevant['Assists'] = df_relevant['Assists'].str.replace(',', '.').astype(float)
test_relevant['Kills'] = test_relevant['Kills'].str.replace(',', '.').astype(float)
test_relevant['Deaths'] = test_relevant['Deaths'].str.replace(',', '.').astype(float)
test_relevant['Assists'] = test_relevant['Assists'].str.replace(',', '.').astype(float)

In [27]:
pipeline = Pipeline([
    ('preprocessing', StandardScaler()),  # Preprocessing step
    ('regression', LinearRegression())  # Linear regression model
])

In [28]:
# Prepare the data for training
X = df_relevant[['Kills', 'Deaths', 'Assists', 'Denies', 'GPM', 'XPM', 'Last Hits']]
y = df_relevant['winrate']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [29]:
pipeline.fit(X_train, y_train)

In [30]:
y_pred = pipeline.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mse

0.004424997993731984

In [31]:
X_new = test_relevant[['Kills', 'Deaths', 'Assists', 'Denies', 'GPM', 'XPM', 'Last Hits']]
y_new = pipeline.predict(X_new)

In [32]:
base_submission = pd.DataFrame({'TeamId': test_relevant['TeamId'], 'Winrate': y_new})
base_submission = pd.merge(base_submission, dicionario_test, how='inner', on='TeamId')
base_submission.drop('TeamId', axis=1)
base_submission = base_submission.sort_values('Winrate', ascending=False)
base_submission.to_csv('previsão.csv', index=False)

In [33]:
sorted_test = test.sort_values('Winrate', ascending=False)
sorted_test = test[[test.columns[1], 'Winrate','TeamId']].sort_values('Winrate', ascending=False)
comparison = base_submission.merge(sorted_test, on='TeamId', suffixes=('_prediction', '_real'))
comparison['Winrate_real']=comparison['Winrate_real'].str.replace(',', '.')
comparison['Winrate_real'] = comparison['Winrate_real'].str.replace('%', '').astype(float)/100
comparison.drop(['TeamId', comparison.columns[3]], axis=1, inplace=True)

In [37]:
erro = comparison['Winrate_prediction'] - (comparison['Winrate_real'])
comparison['erro']=erro
comparison.rename(columns={comparison.columns[1]: 'Team'}, inplace=True)
comparison = comparison[['Team', 'Winrate_prediction', 'Winrate_real', 'erro']]
comparison=comparison.sort_values('Winrate_real', ascending=False)
comparison

Unnamed: 0,Team,Winrate_prediction,Winrate_real,erro
0,Quest Esports,0.668839,0.75,-0.081161
2,Team Liquid,0.648991,0.7368,-0.087809
1,BetBoom Team,0.655457,0.6667,-0.011243
3,Gaimin Gladiators,0.639887,0.6316,0.008287
4,Team Aster,0.596381,0.625,-0.028619
8,PSG.LGD,0.560192,0.5789,-0.018708
9,beastcoast,0.535103,0.5789,-0.043797
5,Team Spirit,0.596043,0.5625,0.033543
6,Tundra Esports,0.594477,0.5556,0.038877
7,9Pandas,0.569159,0.5,0.069159
