## LOL ARAM Project Data Pull

I primarily used the [Cassiopeia](https://github.com/meraki-analytics/cassiopeia) package to pull from the riot API.

In [16]:
import random
import cassiopeia as cass
import pandas as pd

# Set API KEY
api_key = 'INSERT API KEY HERE'

cass.set_riot_api_key(api_key)
cass.print_calls(False)

Test if the key works.  Note that "summoner" is what League of Legends calls their playerbase.

In [17]:
cass.set_default_region("NA")
summoner = cass.get_summoner(name="4real")
print("{name} is a level {level} summoner on the {region} server.".format(name=summoner.name,
                                                                          level=summoner.level,
                                                                          region=summoner.region))

Making call: https://na1.api.riotgames.com/lol/summoner/v4/summoners/by-name/4real
4real is a level 74 summoner on the Region.north_america server.


## Get champion names and create a vectorizer

In [18]:
# Get the data dragon character list from the LOL website

# This contains the champion's names and their key value
champions = pd.read_json("champion.json")
champions["key"] = champions["data"].apply(lambda x: x["key"])
champions["name"] = champions["data"].apply(lambda x: x["name"])

champions[["key","name"]]

Unnamed: 0,key,name
Aatrox,266,Aatrox
Ahri,103,Ahri
Akali,84,Akali
Alistar,12,Alistar
Amumu,32,Amumu
Anivia,34,Anivia
Annie,1,Annie
Ashe,22,Ashe
AurelionSol,136,Aurelion Sol
Azir,268,Azir


Using sklearn's countvectorizer, we want to convert a list of champion names into a sparse vector where each element indicates whether the corresponding champion is present. 

For example:

$$ \text{["Annie", "Ahri", "Zed","Ashe","Ryze"]} \rightarrow \begin{bmatrix} 0 & 0 & ... & 0 & 0\end{bmatrix} $$

$$ \text{Red Team:["Annie", "Ahri", "Zed","Ashe","Ryze"] & Blue Team:["Nasus","Vayne","Kayle","Zyra","Galio"]} \rightarrow \begin{bmatrix} 0 & 0 & ... & 0 & 0\end{bmatrix} $$

In [15]:
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np

cv = CountVectorizer(token_pattern = r"(?u)\b\w+\b") # This is to prevent removing one character key values

# Fit onto champions key values
cv.fit(champions.key.values)

In [None]:
# Export the count vectorizer as a pickle

import pickle
pickle.dump(cv, open( "countvectorizer.p", "wb"))

## Get Data!

The process:

1. We want to get a list of ARAM match IDs
2. Iterate through the match IDs and extract the red and blue champions as a list and convert the champion lists into a sparse vector and compile a dataframe

The following code was adapted from the examples in the [Cassiopeia repo](https://github.com/meraki-analytics/cassiopeia/blob/master/examples/match_collection.py).  This will give a list of ARAM match IDs that we will iterate through later.

### Step 1

In [None]:
import random
from sortedcontainers import SortedList
import arrow

from cassiopeia.core import Summoner, MatchHistory, Match
from cassiopeia import Queue, Patch

# This function helps filter only aram matches for patch 9.19
def filter_match_history(summoner, patch):
    end_time = patch.end
    if end_time is None:
        end_time = arrow.now()
    match_history = MatchHistory(summoner=summoner, queues={Queue.aram}, begin_time=patch.start, end_time=end_time)
    return match_history


# Intial summoner
initial_summoner_name = "4real"
region = "NA"

# create cassiopieia summoner object
summoner = Summoner(name=initial_summoner_name, region=region)
patch = Patch.from_str("9.19", region=region)

# create a sorted list for player ID's (we start with the initial summoner name) 
unpulled_summoner_ids = SortedList([summoner.id])
pulled_summoner_ids = SortedList()

# create a sorted list for ARAM match ID's 
unpulled_match_ids = SortedList()
pulled_match_ids = SortedList() # This is the list of interest

# number of matches you want pulled
num_matches = 100000


# Finally crawl through collect match id's
while unpulled_summoner_ids and len(pulled_match_ids) < num_matches:
    # Get a random summoner from our list of unpulled summoners and pull their match history
    new_summoner_id = random.choice(unpulled_summoner_ids)
    new_summoner = Summoner(id=new_summoner_id, region=region)
    
    # Get the ARAM matches from the random summoner
    matches = filter_match_history(new_summoner, patch)
    unpulled_match_ids.update([match.id for match in matches])
    
    # After we pull the match id's from the summoner we 
    unpulled_summoner_ids.remove(new_summoner_id)
    pulled_summoner_ids.add(new_summoner_id)

    while unpulled_match_ids and len(pulled_match_ids) < num_matches:
        # Get a random match from our list of matches
        new_match_id = random.choice(unpulled_match_ids)
        new_match = Match(id=new_match_id, region=region)
        for participant in new_match.participants:
            if participant.summoner.id not in pulled_summoner_ids and participant.summoner.id not in unpulled_summoner_ids:
                unpulled_summoner_ids.add(participant.summoner.id)
        # The above lines will trigger the match to load its data by iterating over all the participants.
        # If you have a database in your datapipeline, the match will automatically be stored in it.
        unpulled_match_ids.remove(new_match_id)
        pulled_match_ids.add(new_match_id)
        
        # Check progress
        if len(pulled_match_ids) % 100 == 0:
            print(len(pulled_match_ids))
             


In [None]:
# Save into pickle
import pickle
pickle.dump(pulled_match_ids ,open( "pulled_match_ids.p", "wb" ))

Now that we have a list of match IDs, we iterate through them and extract the champions on each team, and target variable( whether the blue team won or not).  

### Step 1

In [None]:
# open pulled match id's
match_ids = pickle.load(open( "pulled_match_ids.p", "rb" ))

# create new dataframe
final_df = pd.DataFrame(columns=["MatchID","BlueWin"] + [(champions.index)[champions.key == str(i)][0] for i in cv.get_feature_names()])

for ids in match_ids:
    
    current_match = Match(id=ids, region="NA") # get match object
    participants = current_match.participants  # get champions in the match
    result = int(current_match.blue_team.win)  # get result of the match

    blue_team = []
    red_team = []
    for p in participants:
        
        # list of champions ( its by their key value not actual names)
        if p.team.side.name == 'blue': 
            blue_team.append(str(p.champion.id))
        else:
            red_team.append(str(p.champion.id))

            
    # to get +1 for blue team and -1 for red team, we minus the two vectors        
    X = (cv.transform([",".join(blue_team)]) - cv.transform([",".join(red_team)])).toarray()
    
    # Join ID, result, and previous match vector (as a single row)
    row = np.append(np.array([ids,result]),X)
    
    # Add to the dataframe
    final_df.loc[len(final_df)] = row
    

# Reset Index to the matches
pickle.dump(final_df ,open( "final_df.p", "wb" ))