In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import random

In [None]:
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 200)
dota_players = pd.read_csv('data/dota_players_final.csv')
dota_teams = pd.read_csv('data/dota_teams.csv')
dota_heroes = pd.read_csv('data/dota_heroes_new.csv')
dota_teams.team_id = dota_teams.team_id + 1
hero_stats = pd.read_csv('data/hero_stats.csv')
dota_games = pd.read_csv('random_matches.csv')
dota_tournaments = pd.read_csv('tournaments_for_insert.csv')

In [None]:
# from dota_games, extract teams, game_id, and win value
df_team_wins = dota_games[['team_id', 'game_id', 'g_win']]
# drop duplicates
df_team_wins = df_team_wins.drop_duplicates()

In [None]:
# extract the winners only
df_winners = df_team_wins[df_team_wins['g_win'] == 1]

In [None]:
# get a count of the number of wins for each team
df_winners_group = df_winners.groupby('team_id').count()

In [None]:
df_winners_group

In [None]:
dota_tournaments

In [None]:
df_team_wins

In [None]:
len(dota_games.player_id.unique())

In [None]:
dota_players.to_csv('data/dota_players_final.csv', index=False)

In [None]:
class SingleGame:
    def __init__(self, game_id, game_winner, game_duration, game_date, tournament_id):
        self.game_id = game_id
        self.game_winner = game_winner
        self.game_duration = game_duration
        self.game_date = game_date
        self.tournament_id = tournament_id

    def get_info(self):
        return {
            "game_id": self.game_id,
            "game_winner": self.game_winner,
            "game_duration": self.game_duration,
            "game_date": self.game_date,
            "tournament_id": self.tournament_id,
        }
    
class Tournament:
    def __init__(self, t_name, t_date, t_prize, t_winner):
        self.t_name = t_name
        self.t_date = t_date
        self.t_prize = t_prize
        self.t_winner = t_winner

    def get_info(self):
        return {
            "t_name": self.t_name,
            "t_date": self.t_date,
            "t_prize": self.t_prize,
            "t_winner": self.t_winner,
        }

### Random tournament creator

In [None]:
# generate a list of the largest city in each player country
def get_largest_city(country):
    country_cities = {
        'United States': 'New York City',
 'Bulgaria': 'Sofia',
 'Sweden': 'Stockholm',
 'Malaysia' : 'Kuala Lumpur',
 'Singapore' : 'Singapore',
 'Peru' : 'Lima',
 'Brazil' : 'São Paulo',
 'Uruguay' : 'Montevideo',
 'Bolivia' : 'La Paz',
 'Argentina' : 'Buenos Aires',
 'South Korea' : 'Seoul',
 'Indonesia' : 'Jakarta',
 'Philippines' : 'Manila',
 'Canada' : 'Toronto',
 'China' : 'Shanghai',
 'Ukraine' : 'Kiev',
 'Venezuela' : 'Caracas',
 'Russia' : 'Moscow',
 'Moldova' : 'Chisinau',
 'Laos' : 'Vientiane',
 'Thailand' : 'Bangkok',
 'Netherlands' : 'Amsterdam',
 'Belgium' : 'Brussels',
 'Romania' : 'Bucharest',
 'Serbia' : 'Belgrade',
 'Kazakhstan' : 'Astana',
 'Austria' : 'Vienna',
 'Germany' : 'Berlin',
 'Belarus' : 'Minsk',
 'Australia' : 'Sydney',
 'Denmark' : 'Copenhagen',
 'Mongolia' : 'Ulaanbaatar',
 'Kyrgyzstan' : 'Bishkek',
 'Ecuador' : 'Quito',
 'Non-representing' : 'Non-representing',
 'Bosnia and Herzegovina' : 'Sarajevo',
 'Lebanon' : 'Beirut',
 'Czechia' : 'Prague',
 'Hungary' : 'Budapest',
 'Georgia' : 'Tbilisi',
 'Greece' : 'Athens',
 'Jordan' : 'Amman',
 'Pakistan' : 'Islamabad',
 'United Arab Emirates' : 'Abu Dhabi',
 'Norway' : 'Oslo',
 'Israel' : 'Jerusalem',
 'Nicaragua' : 'Managua',
 'Poland' : 'Warsaw',
 'Estonia' : 'Tallinn',
 'Mexico' : 'Mexico City',
 'United Kingdom' : 'London',
 'Slovakia' : 'Bratislava',
 'North Macedonia' : 'Skopje',
 'Myanmar' : 'Naypyidaw',
    }
    return country_cities[country]

In [None]:
def assemble_team_data():
    # create a dataframe of dataframes for each team_name in dota_players
    team_dataframes = {}
    for team_name in dota_players["team_name"].unique():
        team_dataframes[team_name] = dota_players[
            dota_players["team_name"] == team_name
        ]
    # get a subframe of all teams with at least 5 players
    team_dataframes_5 = {k: v for k, v in team_dataframes.items() if len(v) >= 5}
    return team_dataframes_5


team_data = assemble_team_data()
team_df = pd.DataFrame()
for team in team_data:
    team_df = team_df.append(team_data[team])

team_df = team_df.reset_index(drop=True)

In [None]:
# create random tournaments
def create_random_tournaments():
    # create a list of random tournament names based on the largest cities in each player's country using the dota_players dataframe
    tournaments = []
    tournament_names = []
    title_options = ["Major", "Minor", "Open", "Pro Series", "Invitational", "Qualifier"]
    title_weights = [0.2, 0.2, 0.2, 0.2, 0.1, 0.1]
    for country in dota_players["country"].unique():
        tournament_names.append(
            get_largest_city(country) + " " + random.choices(title_options, title_weights)[0]
        )
    # create a list of random tournament dates
    # the date range between each tournament depends on the tournament title
    tournament_dates = []
    for i in range(0, len(tournament_names)):
        year = random.randint(2017, 2022)
        if "Major" in tournament_names[i]:
            tournament_dates.append(
                pd.to_datetime(
                    str(year) + "-01-01"
                )
                + pd.DateOffset(
                    days=random.randint(0, 365)
                )
            )
        elif "Minor" in tournament_names[i]:
            tournament_dates.append(
                pd.to_datetime(
                    str(year) + "-01-01"
                )
                + pd.DateOffset(
                    days=random.randint(0, 365)
                )
            )
        elif "Open" in tournament_names[i]:
            tournament_dates.append(
                pd.to_datetime(
                    str(year) + "-01-01"
                )
                + pd.DateOffset(
                    days=random.randint(0, 365)
                )
            )
        elif "Pro Series" in tournament_names[i]:
            tournament_dates.append(
                pd.to_datetime(
                    str(year) + "-01-01"
                )
                + pd.DateOffset(
                    days=random.randint(0, 365)
                )
            )
        elif "Invitational" in tournament_names[i]:
            tournament_dates.append(
                pd.to_datetime(
                    str(year) + "-01-01"
                )
                + pd.DateOffset(
                    days=random.randint(0, 365)
                )
            )
        elif "Qualifier" in tournament_names[i]:
            tournament_dates.append(
                pd.to_datetime(
                    str(year) + "-01-01"
                )
                + pd.DateOffset(
                    days=random.randint(0, 365)
                )
            )
    # create a list of random tournament prize pools
    tournament_prize_pools = []
    for i in range(0, len(tournament_names)):
        # round the prize pool to the nearest 10000
        # set the prize pool based on the tournament title
        if "Major" in tournament_names[i]:
            tournament_prize_pools.append(
                round(random.randint(250000, 1000000) / 10000) * 10000
            )
        elif "Minor" in tournament_names[i]:
            tournament_prize_pools.append(
                round(random.randint(50000, 200000) / 10000) * 10000
            )
        elif "Open" in tournament_names[i]:
            tournament_prize_pools.append(
                round(random.randint(10000, 40000) / 10000) * 10000
            )
        elif "Pro Series" in tournament_names[i]:
            tournament_prize_pools.append(
                round(random.randint(5000, 50000) / 10000) * 10000
            )
        elif "Invitational" in tournament_names[i]:
            tournament_prize_pools.append(
                round(random.randint(50000, 100000) / 10000) * 10000
            )
        elif "Qualifier" in tournament_names[i]:
            tournament_prize_pools.append(
                round(random.randint(5000, 30000) / 10000) * 10000
            )            
    # create a list of random tournament winners
    tournament_winners = []
    for i in range(1, len(tournament_names) + 1):
        tournament_winners.append(random.choice(team_df["team_id"]))

    # create a list of tournament objects
    for i in range(0, len(tournament_names)):
        tournaments.append(
            Tournament(
                t_name = tournament_names[i],
                t_date = tournament_dates[i],
                t_prize = tournament_prize_pools[i],
                t_winner = tournament_winners[i],
            )
        )
    for tourney in tournaments:
        tournaments[tournaments.index(tourney)] = tourney.get_info()
    return pd.DataFrame(tournaments)

In [None]:
tourneys = create_random_tournaments()

In [99]:
df_team_wins.to_csv("team_games.csv", index=False)

In [82]:
df_team_wins

Unnamed: 0,team_id,game_id,g_win
0,69,1,1
5,34,1,0
10,61,2,0
15,42,2,1
20,11,3,0
...,...,...,...
9975,49,998,0
9980,65,999,1
9985,59,999,0
9990,15,1000,0


In [103]:
# add a tournament_id column to the df_team_wins dataframe, tournament_id is the index of the tournament in the tourneys dataframe
# the tournament_ids should start at 1 and increment by 1 for every 20 games

In [104]:
df_team_wins

Unnamed: 0,team_id,game_id,g_win,tournament_id
0,69,1,1,1
5,34,1,0,2
10,61,2,0,3
15,42,2,1,4
20,11,3,0,5
...,...,...,...,...
9975,49,998,0,1996
9980,65,999,1,1997
9985,59,999,0,1998
9990,15,1000,0,1999


In [None]:
# create a dataframe of SingleGame objects using the df_team_wins dataframe
df_single_games = pd.DataFrame()
for i in range(0, len(df_team_wins)):
    df_single_games = df_single_games.append(df_team_wins.iloc[i]["games"])

In [95]:
# from df_team_wins, select the team with the most wins in the first 5% of the dataframe
# this will be the team with the most wins in the first 5% of the dataframe
start = 0
end = int(len(df_team_wins) * 0.05)
team_with_most_wins = df_winners.iloc[:int(len(df_winners) * 0.05)]["team_id"].value_counts()

In [101]:
len(tourneys)

54

In [None]:
tourneys.to_csv("tournaments_for_insert.csv", index=False)