In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import random

In [21]:
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 200)
dota_players = pd.read_csv('data/dota_players_final.csv')
dota_teams = pd.read_csv('data/dota_teams.csv')
dota_heroes = pd.read_csv('data/dota_heroes_new.csv')
dota_teams.team_id = dota_teams.team_id + 1
hero_stats = pd.read_csv('data/hero_stats.csv')
dota_games = pd.read_csv('random_matches.csv')

In [25]:
# from dota_games, extract teams, game_id, and win value
df_team_wins = dota_games[['team_id', 'game_id', 'g_win']]
# drop duplicates
df_team_wins = df_team_wins.drop_duplicates()

In [35]:
# extract the winners only
df_winners = df_team_wins[df_team_wins['g_win'] == 1]

In [36]:
df_winners

Unnamed: 0,team_id,game_id,g_win
0,69,1,1
15,42,2,1
25,60,3,1
30,54,4,1
40,78,5,1
...,...,...,...
9955,15,996,1
9965,33,997,1
9970,73,998,1
9980,65,999,1


In [15]:
len(dota_games.player_id.unique())

270

In [6]:
dota_players.to_csv('data/dota_players_final.csv', index=False)

In [None]:
class SingleGame:
    def __init__(self, game_id, game_winner, game_duration, game_date, tournament_id):
        self.game_id = game_id
        self.game_winner = game_winner
        self.game_duration = game_duration
        self.game_date = game_date
        self.tournament_id = tournament_id

    def get_info(self):
        return {
            "game_id": self.game_id,
            "game_winner": self.game_winner,
            "game_duration": self.game_duration,
            "game_date": self.game_date,
            "tournament_id": self.tournament_id,
        }
    
class Tournament:
    def __init__(self, t_name, t_date, t_prize, t_winner):
        self.t_name = t_name
        self.t_date = t_date
        self.t_prize = t_prize
        self.t_winner = t_winner

    def get_info(self):
        return {
            "t_name": self.t_name,
            "t_date": self.t_date,
            "t_prize": self.t_prize,
            "t_winner": self.t_winner,
        }

In [4]:
# generate a list of the largest city in each player country
def get_largest_city(country):
    country_cities = {
        'United States': 'New York City',
 'Bulgaria': 'Sofia',
 'Sweden': 'Stockholm',
 'Malaysia' : 'Kuala Lumpur',
 'Singapore' : 'Singapore',
 'Peru' : 'Lima',
 'Brazil' : 'São Paulo',
 'Uruguay' : 'Montevideo',
 'Bolivia' : 'La Paz',
 'Argentina' : 'Buenos Aires',
 'South Korea' : 'Seoul',
 'Indonesia' : 'Jakarta',
 'Philippines' : 'Manila',
 'Canada' : 'Toronto',
 'China' : 'Shanghai',
 'Ukraine' : 'Kiev',
 'Venezuela' : 'Caracas',
 'Russia' : 'Moscow',
 'Moldova' : 'Chisinau',
 'Laos' : 'Vientiane',
 'Thailand' : 'Bangkok',
 'Netherlands' : 'Amsterdam',
 'Belgium' : 'Brussels',
 'Romania' : 'Bucharest',
 'Serbia' : 'Belgrade',
 'Kazakhstan' : 'Astana',
 'Austria' : 'Vienna',
 'Germany' : 'Berlin',
 'Belarus' : 'Minsk',
 'Australia' : 'Sydney',
 'Denmark' : 'Copenhagen',
 'Mongolia' : 'Ulaanbaatar',
 'Kyrgyzstan' : 'Bishkek',
 'Ecuador' : 'Quito',
 'Non-representing' : 'Non-representing',
 'Bosnia and Herzegovina' : 'Sarajevo',
 'Lebanon' : 'Beirut',
 'Czechia' : 'Prague',
 'Hungary' : 'Budapest',
 'Georgia' : 'Tbilisi',
 'Greece' : 'Athens',
 'Jordan' : 'Amman',
 'Pakistan' : 'Islamabad',
 'United Arab Emirates' : 'Abu Dhabi',
 'Norway' : 'Oslo',
 'Israel' : 'Jerusalem',
 'Nicaragua' : 'Managua',
 'Poland' : 'Warsaw',
 'Estonia' : 'Tallinn',
 'Mexico' : 'Mexico City',
 'United Kingdom' : 'London',
 'Slovakia' : 'Bratislava',
 'North Macedonia' : 'Skopje',
 'Myanmar' : 'Naypyidaw',
    }
    return country_cities[country]

In [14]:
# create random tournaments
def create_random_tournaments():
    # create a list of random tournament names based on the largest cities in each player's country using the dota_players dataframe
    tournaments = []
    tournament_names = []
    title_options = ["Major", "Minor", "Open", "Pro Series", "Invitational", "Qualifier"]
    title_weights = [0.2, 0.2, 0.2, 0.2, 0.1, 0.1]
    for country in dota_players["country"].unique():
        tournament_names.append(
            get_largest_city(country) + " " + random.choices(title_options, title_weights)[0]
        )
    # create a list of random tournament dates
    # the date range between each tournament depends on the tournament title
    tournament_dates = []
    for i in range(0, len(tournament_names)):
        year = random.randint(2017, 2022)
        if "Major" in tournament_names[i]:
            tournament_dates.append(
                pd.to_datetime(
                    str(year) + "-01-01"
                )
                + pd.DateOffset(
                    days=random.randint(0, 365)
                )
            )
        elif "Minor" in tournament_names[i]:
            tournament_dates.append(
                pd.to_datetime(
                    str(year) + "-01-01"
                )
                + pd.DateOffset(
                    days=random.randint(0, 365)
                )
            )
        elif "Open" in tournament_names[i]:
            tournament_dates.append(
                pd.to_datetime(
                    str(year) + "-01-01"
                )
                + pd.DateOffset(
                    days=random.randint(0, 365)
                )
            )
        elif "Pro Series" in tournament_names[i]:
            tournament_dates.append(
                pd.to_datetime(
                    str(year) + "-01-01"
                )
                + pd.DateOffset(
                    days=random.randint(0, 365)
                )
            )
        elif "Invitational" in tournament_names[i]:
            tournament_dates.append(
                pd.to_datetime(
                    str(year) + "-01-01"
                )
                + pd.DateOffset(
                    days=random.randint(0, 365)
                )
            )
        elif "Qualifier" in tournament_names[i]:
            tournament_dates.append(
                pd.to_datetime(
                    str(year) + "-01-01"
                )
                + pd.DateOffset(
                    days=random.randint(0, 365)
                )
            )
    # create a list of random tournament prize pools
    tournament_prize_pools = []
    for i in range(0, len(tournament_names)):
        # round the prize pool to the nearest 10000
        # set the prize pool based on the tournament title
        if "Major" in tournament_names[i]:
            tournament_prize_pools.append(
                round(random.randint(250000, 1000000) / 10000) * 10000
            )
        elif "Minor" in tournament_names[i]:
            tournament_prize_pools.append(
                round(random.randint(50000, 200000) / 10000) * 10000
            )
        elif "Open" in tournament_names[i]:
            tournament_prize_pools.append(
                round(random.randint(10000, 40000) / 10000) * 10000
            )
        elif "Pro Series" in tournament_names[i]:
            tournament_prize_pools.append(
                round(random.randint(5000, 50000) / 10000) * 10000
            )
        elif "Invitational" in tournament_names[i]:
            tournament_prize_pools.append(
                round(random.randint(50000, 100000) / 10000) * 10000
            )
        elif "Qualifier" in tournament_names[i]:
            tournament_prize_pools.append(
                round(random.randint(5000, 30000) / 10000) * 10000
            )            
    # create a list of random tournament winners
    tournament_winners = []
    for i in range(0, len(tournament_names)):
        tournament_winners.append(random.choice(dota_teams["team_id"]))

    # create a list of tournament objects
    for i in range(0, len(tournament_names)):
        tournaments.append(
            Tournament(
                t_name = tournament_names[i],
                t_date = tournament_dates[i],
                t_prize = tournament_prize_pools[i],
                t_winner = tournament_winners[i],
            )
        )
    for tourney in tournaments:
        tournaments[tournaments.index(tourney)] = tourney.get_info()
    return pd.DataFrame(tournaments)

In [15]:
tourneys = create_random_tournaments()

In [16]:
tourneys

Unnamed: 0,t_name,t_date,t_prize,t_winner
0,New York City Major,1966-04-06 00:00:00.020161008,770000,79
1,Sofia Major,1967-02-14 00:00:00.020200110,370000,19
2,Stockholm Open,1969-03-15 00:00:00.020201019,10000,22
3,Kuala Lumpur Qualifier,1969-02-02 00:00:00.020190906,30000,69
4,Singapore Open,1969-02-23 00:00:00.020190224,20000,48
5,Lima Major,1964-01-23 00:00:00.020180204,900000,54
6,São Paulo Qualifier,1969-01-28 00:00:00.020180823,20000,28
7,Montevideo Major,1966-06-22 00:00:00.020160513,820000,49
8,La Paz Qualifier,1969-03-14 00:00:00.020180427,10000,41
9,Buenos Aires Open,1969-11-04 00:00:00.020170407,30000,46


In [13]:
tourneys.to_csv("tournaments_for_insert.csv", index=False)