In [None]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import math
import json
import collections
import re
from pprint import pprint
import datetime
from heapq import nlargest

In [None]:
def clean_data(data):
    column_names = ['matchIdx', 'roundIdx', 'date', 'type', 'data','teams', 'highlight_score']
    all_data = pd.DataFrame(columns = column_names)
    for index,value in enumerate(data):
        all_data.set_value(index, 'matchIdx', 0)
        all_data.set_value(index, 'roundIdx', data[index]['roundIdx'])
        all_data.set_value(index, 'date', data[index]['date'])
        all_data.set_value(index, 'type', data[index]['type'])
        all_data.set_value(index, 'data', data[index]['data'])
        all_data.set_value(index, 'highlight_score', 0)
    
    return all_data

In [None]:
num_of_files = 11
train_test_index_cutoff = 7 #How many json files to use
jsonfp = []
train = []
test = []
for i in range(1, num_of_files + 1):  
    jsonfp.append('train_set/timelines/%s.json' % str(i))
    with open(jsonfp[i-1]) as f:
        if i in range(0,train_test_index_cutoff):
            train = train + json.load(f)
            print('train: train_set/timelines/%s.json' % str(i))
        elif i != 10:
            test = test + json.load(f)

In [None]:
train_df = clean_data(train)
print('Done1')
test_df = clean_data(test)
print('Done2')

In [None]:
def startTimeRound(df):
    for index,val in df.iterrows():
        if val["type"] == 'round_start':
            t = df.iloc[index]["date"].split('+')[0]
            break        
    return t

def startTimeOverall(df):
    return df.iloc[0]["date"].split('+')[0]

def endTime(df):
    return df.iloc[len(df)-1]["date"].split('+')[0]

In [None]:
def calculate_gaussian_distro_val(variance, second, meanSecond):
    gaussianVal = (1 / math.sqrt(2 * math.pi * variance)) * math.exp((-1/2 * (second - meanSecond)**2) / 2*variance**2)
    return gaussianVal

In [None]:
def create_time_dict_of_zeros(t1, t2, delta):
    dict = {}
    while t1 <= t2:
        dict[t1] = 0
        t1 = t1 + delta
    return dict    

In [None]:
def plot_curve_from_dict(dict):
    lists = sorted(dict.items()) # sorted by key, return a list of tuples
    x, y = zip(*lists) # unpack a list of pairs into two tuples
    plt.figure(figsize=(10,10))
    plt.plot(x, y)
    plt.show()

In [None]:
weights_dict = {'assist':0.5, 'bomb_defuse':1, 'bomb_defuse_begin':0.9, 'bomb_plant':0.85, 'kill':0.75, 'purchase':0,
 'round_end':0, 'round_start':0, 'suicide':0.3, 'throw':0.15, 'round_backup':0, 'round_stop':0}

In [None]:
important_events = []
importance_thresh = 0.5
for key, val in weights_dict.items():
    if val >= importance_thresh:
        important_events.append(key)
player_streak_multiplier = 0.33
headshot_multiplier = 1.25

In [None]:
#empty streak dict on round end
def calculate_score_dictionary(df):
    tTemplate = "%Y-%m-%dT%H:%M:%S"
    second = datetime.timedelta(seconds=1)
    tStart = datetime.datetime.strptime(startTimeRound(df).split('.')[0], tTemplate)
    tEnd = datetime.datetime.strptime(endTime(df).split('.')[0], tTemplate)
    tDistributionInterval = datetime.timedelta(seconds=5)
    player_streak_dict = {}

    timeline_score_dict = create_time_dict_of_zeros(tStart, tEnd, second)
    
    match_count = 0
    for index,val in df.iterrows(): #Iterating through the dataframe
        event_type = val["type"]
        
        if event_type == "round_start" and val["roundIdx"] == 1:
            match_count = match_count + 1
            player_streak_dict.clear()

        if event_type != "purchase": 
            
            date = datetime.datetime.strptime(val["date"].split('.')[0], tTemplate)
            score = timeline_score_dict[date] + weights_dict[val["type"]] #basic score by only considering event weights
            if event_type in {'kill', 'assist'}:
                if event_type == 'kill' and val["data"]["headshot"]: 
                    score = score * headshot_multiplier 
                if val["data"]["actor"]['playerId'] in player_streak_dict: #adjusting the score if the same player has made multiple concecutive kills
                    player_streak_dict[val["data"]["actor"]['playerId']] = player_streak_dict[val["data"]["actor"]['playerId']] + 1
                    score = score + (score * player_streak_dict[val["data"]["actor"]['playerId']] * player_streak_multiplier)  
                else:
                    player_streak_dict[val["data"]["actor"]['playerId']] = 1
            pop_keys = []
            for key, value in player_streak_dict.items(): #Reducing the streak amount by 1 for every concecutive event players in the streak dict did not participate in
                if event_type in {'kill', 'assist'}:
                    if player_streak_dict[key] != player_streak_dict[val["data"]["actor"]['playerId']]:
                        player_streak_dict[key] = player_streak_dict[key] - 1
                else:
                    player_streak_dict[key] = player_streak_dict[key] - 1
                if player_streak_dict[key] <= 0: 
                    pop_keys.append(key)
            for key in pop_keys: #Removing a player from the streak dict if their streak reaches 0
                player_streak_dict.pop(key)
            timeline_score_dict[date] = score
            val["highlight_score"] = score
            val["matchIdx"] = match_count
     
            if score > score_thresh_for_distribution: #Preparing to perform Gaussian distribution of the score to tDistributionInterval entries before and after the current dict entry 
                variance = 1
                tempDate = date
                i = 1
                while tempDate > (date - tDiffusionInterval) and (date-datetime.timedelta(seconds=i)) in timeline_score_dict and val["type"] != 'purchase': #Entries before the current event
                    previousScore = timeline_score_dict[date-datetime.timedelta(seconds=i)]
                    previousDateSecond = (date-datetime.timedelta(seconds=i)).second
                    dateSecond = date.second
                    gaussianDistroVal = calculate_gaussian_distro_val(variance, previousDateSecond, dateSecond)
                    adjustedScore = previousScore + gaussianDistroVal
                    timeline_score_dict[date-datetime.timedelta(seconds=i)] = adjustedScore
                    i = i + 1
                    tempDate = tempDate - second
                tempDate = date
                i = 1
                while tempDate < (date + tDiffusionInterval) and (date+datetime.timedelta(seconds=i)) in timeline_score_dict and val["type"] != 'purchase': #Entries after the current event
                    nextScore = timeline_score_dict[date+datetime.timedelta(seconds=i)]
                    nextDateSecond = (date+datetime.timedelta(seconds=i)).second
                    dateSecond = date.second
                    gaussianDistroVal = calculate_gaussian_distro_val(variance, nextDateSecond, dateSecond)
                    adjustedScore = nextScore + gaussianDistroVal
                    timeline_score_dict[date+datetime.timedelta(seconds=i)] = adjustedScore
                    i = i + 1
                    tempDate = tempDate + second 
                                    
        return timeline_score_dict

In [None]:
train_dict = calculate_score_dictionary(train_df)
test_dict = calculate_score_dictionary(test_df)

In [None]:
game_objects_train = create_game_objects(train_df)
game_objects_test = create_game_objects(test_df)

In [None]:
team_names_dict = {
    "95e1ecf1-d6ec-4643-96ef-ef971a9fa4fe": "Cloud9",
    "24a7a69c-4c71-4534-853d-31b6d0be1399": "FaZe Clan",
    "5dd8de73-181c-4e88-bf0f-4d18c9df8109": "Team Liquid",
    "3ff57e88-df4a-4d8e-9d1a-a4480ddbf727": "Ninjas in Pyjamas",
    "2d651b3b-8db9-4bb5-b3e1-c801050fc424": "Fnatic",    
} 

In [None]:
def calculate_highlights(df, highlight_thresh):
    highlights_dict = {}
    match_highlights = []
    match_count = 1
    for row in df.itertuples(index=True, name='column_names'):
        if getattr(row, "matchIdx") > match_count:
            highlights_dict[match_count] = match_highlights
            match_highlights = []
            match_count  = match_count + 1
        if (getattr(row, "highlight_score") > highlight_thresh) and (getattr(row, "type") in important_events):
                match_highlights.append(row)
    return highlights_dict

In [None]:
highlight_thresh = 3
highlights_dict = calculate_highlights(train_df, highlight_thresh)

In [None]:
def get_event_details_on_type(highlight):
    details = 0
    if highlight["type"] == "kill":
        details = "killed " + str(highlight["data"]["victim"]["playerId"]) + " with a " + str(highlight["data"]["weapon"])  
    elif highlight["type"] == "assist":
        details = "assisted in killing " + str(highlight["data"]["victim"]["playerId"])
    elif highlight["type"] == "bomb_plant":
        details = "planted the bomb" + str(highlight["roundIdx"])
    elif highlight["type"] == "bomb_defuse_begin":
        details = "started to defuse the bomb"
    elif highlight["type"] == "bomb_defuse":
        details = "successfully defused the bomb, taking it home for the team" + str(highlight["roundIdx"])
    elif highlight["type"] == "throw":
        details = "throwing a " + str(highlight["data"]["item"]) + " and causing all kinds of trouble"
    return details

In [None]:
def get_match_highlight_modifier_string(highlight_count):
    highlight_modifier = 0
    if highlight_count > 5:
        highlight_modifier = "a thrilling"
    elif highlight_count > 3:
        highlight_modifier = "an exciting"
    else:
        highlight_modifier = "a great"
        
    return highlight_modifier

In [None]:
def get_round_win_balance_string(t1_score, t2_score):
    rwb = [0, 0]
    if (t1_score >= t2_score * 3) or (t2_score > t1_score * 3):
        rwb[0] = "an overwhelmingly one-sided"
        rwb[1] = "and"
    elif (t1_score >= t2_score * 2) or (t2_score > t1_score * 2):
        rwb[0] = "a mostly one-sided"
        rwb[1] = "and"
    elif (t1_score >= t2_score * 1.5) or (t2_score > t1_score * 1.5): 
        rwb[0] = "a somewhat one-sided"
        rwb[1] = "but"
    else:
        rwb[0] = "a rather even"
        rwb[1] = "but"
        
    return rwb

In [None]:
def get_empty_summary_string_list(index): 
    summaries = {}
    summaries[0] = [["On {0}, we witnessed {1} match between {2} and {3}."],
                    ["It was {0} fight, {1} in the end, {2} won with {3} to {4}."], 
                    ["There was a total of {0} highlights in the match, that we found interesting."],
                    ["The best ones were when {0} {1}, in round {2}. "],
                    ["Then {0} {1}, in round {2}."],
                    ["And who can forget {0} {1}, in round {2}."],
                    ["Check out our highlights video at {0}, and stay tuned for more exciting match summaries."]]
    summaries[1] = [""]
    return summaries[index]


In [None]:
def get_match_summary(match_index, n_highlights): #Defining them manually, instead of using NLG
    match_highlight_count = len(matches_highlight_dict[match_index])
    match_highlight_modifier = get_match_highlight_modifier_string(match_highlight_count)
    round_win_balance = get_round_win_balance_string(t1_score, t2_score)
    teams = [[team_names_dict[game_objects[match_index].get_team(0).get_team_id()], 
              game_objects[match_index].get_team(0).get_score()], 
             [team_names_dict[game_objects[match_index].get_team(1).get_team_id()], 
              game_objects[match_index].get_team(1).get_score()]]
    winning_team = [teams[0][0] if teams[0][1] > teams[1][1] else teams[0][0], 
                    max(teams[0][1], teams[1][1])]
    losing_team  = [teams[0][0] if teams[0][1] < teams[1][1] else teams[0][0], 
                    min(teams[0][1], teams[1][1])]
    
    top_n_highlights = nlargest(n_highlights, matches_highlight_dict[match_index], key=int)
    date = matches_highlight_dict[match_index][top_n_highlights[len(top_n_highlights) - 1]]["date"].split('T')[0]
    highlights = {}
    for i in range(0, n_highlights):
        highlights[i] = {'player': matches_highlight_dict[match_index][top_n_highlights[i]]["data"]["actor"]["playerId"]},
        {'type': matches_highlight_dict[match_index][top_n_highlights[i]]["type"] },
        {'round': matches_highlight_dict[match_index][top_n_highlights[i]]["roundIdx"]},
        {'eventdets': get_event_details_on_type(matches_highlight_dict[match_index][top_n_highlights[i]])}
    highlights_link = "{LINK}"    
    summary_index = 0
    summary = get_empty_summary_string_list(summary_index)
    if summary_index == 1: #Different for every summary type, not applying NLG
        summary[0] = str(summary[0]).format(date, match_highlight_modifier, t1_name, t2_name)
        summary[1] = str(summary[1]).format(round_win_balance[0], round_win_balance[1], winning_team, winning_score, losing_score)
        summary[2] = str(summary[2]).format(match_highlight_count)
        for i in range(3,5):
            summary[i] = str(summary[i]).format(highlights[i-3]['player'], highlights[i-3]['eventdets'], highlights[i-3]['round'])
        summary[6] = str(summary[6]).format(highlights_link)
    
    return summary
        

In [None]:
get_match_summary(1, 3)