In [1]:
%run ../../Setup.ipynb
import ast
import json
import pandas as pd
import numpy as np
from bdcc.database.connection import database_connector as connector
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

In [2]:
def write_match(match):
    """
    Function to create entries in the database.
    params: *match: dict of match data
    return: HTTP-Status Code
    """
    http_status = 200
    # connect to database
    db_con = connector.DatabaseConnector()
    db_con.connect()

    match_found = False
    for entry in db_con.get(id=match['match_id']):
        if entry:
            match_found = True

    if not match_found:
        http_status = 201
        db_con.create(match) # create new entry in database
    else:
        http_status = 409

    db_con.disconnect()

    return http_status

In [3]:
def delete_match(match_id):
    """
    Function to delete entries in the database.
    params: *match_id: id of the match to be deleted
    """
    http_status = 200
    db_con = connector.DatabaseConnector()
    db_con.connect()

    # Überprüfung, ob zu löschendes Match in Datenbank existiert
    matches = list(db_con.get())
    if any(match['match_id'] == match_id for match in matches):
        http_status = 205
        db_con.remove(match_id)
    else:
        http_status = 204
    
    db_con.disconnect()
    return http_status

In [4]:
def train_model():
    pass

In [5]:
def predict():
    pass

In [2]:
http_status = 200
db_con = connector.DatabaseConnector()
db_con.connect()

matches = list(db_con.get())
db_con.disconnect()

<bdcc.database.connection.database_connector.DatabaseConnector at 0x7f6f7c4dfc10>

In [21]:
matches

[{'_id': ObjectId('60d33ba83420b8edaaa9cba8'),
  'match_id': 6049289816,
  'radiant_win': True,
  'players': [{'pings': 2,
    'assists': 3,
    'deaths': 1,
    'kills': 21,
    'win': True},
   {'pings': 6, 'assists': 10, 'deaths': 1, 'kills': 0, 'win': True},
   {'pings': 11, 'assists': 7, 'deaths': 4, 'kills': 2, 'win': True},
   {'pings': 0, 'assists': 6, 'deaths': 0, 'kills': 9, 'win': True},
   {'pings': 2, 'assists': 2, 'deaths': 2, 'kills': 1, 'win': True},
   {'pings': 4, 'assists': 1, 'deaths': 14, 'kills': 1, 'win': False},
   {'pings': 12, 'assists': 1, 'deaths': 11, 'kills': 1, 'win': False},
   {'pings': 9, 'assists': 1, 'deaths': 3, 'kills': 0, 'win': False},
   {'pings': 1, 'assists': 1, 'deaths': 2, 'kills': 2, 'win': False},
   {'pings': 1, 'assists': 3, 'deaths': 5, 'kills': 1, 'win': False}]},
 {'_id': ObjectId('60d33ba83420b8edaaa9cbaa'),
  'match_id': 6049281711,
  'radiant_win': True,
  'players': [{'pings': 16,
    'assists': 9,
    'deaths': 4,
    'kills': 1,

In [3]:
# user story 1

test_list = []

for j in range(len(matches)):
    total_pings_win = 0
    total_pings_loss = 0
    for i in range(len(matches[j]['players'])):
        if matches[j]['players'][i]['win'] == True:
            total_pings_win += matches[j]['players'][i]['pings']
        else:
            total_pings_loss += matches[j]['players'][i]['pings']
    test_list.append([total_pings_win, 1])
    test_list.append([total_pings_loss, 0])

dataframe = pd.DataFrame(test_list, columns =['ttlPings', 'win'])

In [4]:
dataframe = dataframe.dropna()
dataframe

Unnamed: 0,ttlPings,win
0,21,1
1,27,0
2,64,1
3,57,0
4,9,1
...,...,...
327,106,0
328,50,1
329,65,0
330,34,1


In [5]:
# user story 1
y_data_labels = dataframe['win']
x_data_features = dataframe['ttlPings']

In [6]:
x_data_features = np.array(x_data_features).reshape(-1,1)

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x_data_features, y_data_labels, test_size=0.2, random_state = 10)

In [8]:
clf1 = MLPClassifier(random_state=10, max_iter=1000).fit(x_train, y_train) #Erhöhung der max_iter um konvergenz zu erreichen
#print("Vorhersage MLPClassifier: ", clf1.predict(x_test))
print("Gesamtgenauigkeit MLPClassifier: ", clf1.score(x_test, y_test))
print("Proba: ", clf1.predict_proba(x_test))

clf2 = RandomForestClassifier(random_state=10).fit(x_train, y_train)
#print("Vorhersage RandomForestClassifier: ", clf2.predict(x_test))
print("Gesamtgenauigkeit RandomForestClassifier: ", clf2.score(x_test, y_test))

clf3 = svm.SVC(random_state=10).fit(x_train, y_train)
#print("Vorhersage SVM: ", clf3.predict(x_test))
print("Gesamtgenauigkeit SVM: ", clf3.score(x_test, y_test))

clf4 = SGDClassifier(random_state=10).fit(x_train, y_train)
#print("Vorhersage SGDClassifier: ", clf4.predict(x_test))
print("Gesamtgenauigkeit SGDClassifier: ", clf4.score(x_test, y_test))

clf5 = DecisionTreeClassifier().fit(x_train, y_train) # kein random_state attribut
#print("Vorhersage DecisionTreeClassifier: ", clf5.predict(x_test))
print("Gesamtgenauigkeit DecisionTreeClassifier: ", clf5.score(x_test, y_test))

clf6 = KNeighborsClassifier().fit(x_train, y_train) # kein random_state attribut
#print("Vorhersage KNeighborsClassifier: ", clf6.predict(x_test))
print("Gesamtgenauigkeit KNeighborsClassifier: ", clf6.score(x_test, y_test))

clf7 = GaussianNB().fit(x_train, y_train) # kein random_state attribut
#print("Vorhersage GaussianNB: ", clf7.predict(x_test))
print("Gesamtgenauigkeit GaussianNB: ", clf7.score(x_test, y_test))

Gesamtgenauigkeit MLPClassifier:  0.5522388059701493
Proba:  [[0.50788646 0.49211354]
 [0.48204039 0.51795961]
 [0.48505799 0.51494201]
 [0.49059293 0.50940707]
 [0.49864751 0.50135249]
 [0.49965446 0.50034554]
 [0.50519256 0.49480744]
 [0.50418568 0.49581432]
 [0.49562673 0.50437327]
 [0.49613018 0.50386982]
 [0.48958637 0.51041363]
 [0.45896509 0.54103491]
 [0.50519256 0.49480744]
 [0.50267532 0.49732468]
 [0.50951887 0.49048113]
 [0.50811657 0.49188343]
 [0.45297026 0.54702974]
 [0.51264869 0.48735131]
 [0.49663363 0.50336637]
 [0.46346911 0.53653089]
 [0.48958637 0.51041363]
 [0.49764056 0.50235944]
 [0.4745026  0.5254974 ]
 [0.46396994 0.53603006]
 [0.4951233  0.5048767 ]
 [0.48254324 0.51745676]
 [0.50217185 0.49782815]
 [0.50834667 0.49165333]
 [0.4951233  0.5048767 ]
 [0.47751652 0.52248348]
 [0.48807668 0.51192332]
 [0.51321921 0.48678079]
 [0.4951233  0.5048767 ]
 [0.49613018 0.50386982]
 [0.46547286 0.53452714]
 [0.50217185 0.49782815]
 [0.48807668 0.51192332]
 [0.50857677 0

In [9]:
# user story 2+3
filter_keys = ['assists', 'deaths', 'kills', 'pings', 'win']
player_kda = []

for j in range(len(matches)):
    for i in range(len(matches[j]['players'])):
        player_kda.append({key:value for key, value in matches[j]['players'][i].items() if key in filter_keys})

dataframe_kda = pd.DataFrame(player_kda)

In [10]:
dataframe_kda = dataframe_kda.dropna()
dataframe_kda

Unnamed: 0,pings,assists,deaths,kills,win
0,2,3,1,21,True
1,6,10,1,0,True
2,11,7,4,2,True
3,0,6,0,9,True
4,2,2,2,1,True
...,...,...,...,...,...
1655,5,11,3,4,True
1656,1,7,6,12,True
1657,2,13,4,4,True
1658,5,9,1,19,True


In [27]:
# user story 2+3 training
y_data_labels = dataframe_kda['win']
x_data_features = dataframe_kda.drop(['win'], axis=1)

x_train, x_test, y_train, y_test = train_test_split(x_data_features, y_data_labels, test_size=0.2, random_state = 50)

In [28]:
clf1 = MLPClassifier(random_state=10, max_iter=1000).fit(x_train, y_train) #Erhöhung der max_iter um konvergenz zu erreichen
#print("Vorhersage MLPClassifier: ", clf1.predict(x_test))
print("Gesamtgenauigkeit MLPClassifier: ", clf1.score(x_test, y_test))
#print("Proba: ", clf1.predict_proba(x_test))

clf2 = RandomForestClassifier(random_state=10).fit(x_train, y_train)
#print("Vorhersage RandomForestClassifier: ", clf2.predict(x_test))
print("Gesamtgenauigkeit RandomForestClassifier: ", clf2.score(x_test, y_test))

clf3 = svm.SVC(random_state=10).fit(x_train, y_train)
#print("Vorhersage SVM: ", clf3.predict(x_test))
print("Gesamtgenauigkeit SVM: ", clf3.score(x_test, y_test))

clf4 = SGDClassifier(random_state=10).fit(x_train, y_train)
#print("Vorhersage SGDClassifier: ", clf4.predict(x_test))
print("Gesamtgenauigkeit SGDClassifier: ", clf4.score(x_test, y_test))

clf5 = DecisionTreeClassifier().fit(x_train, y_train) # kein random_state attribut
#print("Vorhersage DecisionTreeClassifier: ", clf5.predict(x_test))
print("Gesamtgenauigkeit DecisionTreeClassifier: ", clf5.score(x_test, y_test))

clf6 = KNeighborsClassifier().fit(x_train, y_train) # kein random_state attribut
#print("Vorhersage KNeighborsClassifier: ", clf6.predict(x_test))
print("Gesamtgenauigkeit KNeighborsClassifier: ", clf6.score(x_test, y_test))

clf7 = GaussianNB().fit(x_train, y_train) # kein random_state attribut
#print("Vorhersage GaussianNB: ", clf7.predict(x_test))
print("Gesamtgenauigkeit GaussianNB: ", clf7.score(x_test, y_test))

Gesamtgenauigkeit MLPClassifier:  0.9367469879518072
Gesamtgenauigkeit RandomForestClassifier:  0.9307228915662651
Gesamtgenauigkeit SVM:  0.9307228915662651
Gesamtgenauigkeit SGDClassifier:  0.9126506024096386
Gesamtgenauigkeit DecisionTreeClassifier:  0.9006024096385542
Gesamtgenauigkeit KNeighborsClassifier:  0.9066265060240963
Gesamtgenauigkeit GaussianNB:  0.9307228915662651


In [13]:
print("Proba: ", clf1.predict_proba(x_data_features))

Proba:  [[3.53979018e-05 9.99964602e-01]
 [4.42480041e-02 9.55751996e-01]
 [5.70288411e-01 4.29711589e-01]
 ...
 [4.29996348e-02 9.57000365e-01]
 [4.24465409e-06 9.99995755e-01]
 [2.12700736e-04 9.99787299e-01]]


In [14]:
olol = clf1.predict_proba(x_data_features)

In [15]:
olol[7]

array([0.95279286, 0.04720714])

In [16]:
dataframe_kda.loc[7]

pings          9
assists        1
deaths         3
kills          0
win        False
Name: 7, dtype: object