# Label Majors
In this notebook we want to:
- Label the major tournaments based on https://liquipedia.net/smash/Major_Tournaments/Melee

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
from collections import defaultdict
import matplotlib.pyplot as plt
import datetime 

import sqlite3
import sys
import time
import tqdm
from tqdm.auto import tqdm
import pickle
import joblib
import os
import re
from datetime import datetime

if os.path.exists('/workspace/data'):
    # Load the dictionary of DataFrames from the pickle
    data_path = '/workspace/data/'
else:
    data_path = '../data/'


## Loading SQLite Database into Pandas DataFrames

The following code connects to an SQLite database (`melee_player_database.db`) and converts each table within the database into a pandas DataFrame. The DataFrames will be stored in a dictionary, where each key corresponds to the table name with `_df` appended, and the values are the respective DataFrames.

### Steps:

1. **Database Connection**: We use the `sqlite3` library to connect to the SQLite database file.
2. **Retrieve Table Names**: A query retrieves all the table names in the database.
3. **Convert Tables to DataFrames**: For each table:
   - The table is loaded into a pandas DataFrame using `pd.read_sql()`.
   - We check each column to see if any data is JSON-formatted (lists or dictionaries). If so, we convert these columns from strings into their corresponding Python objects using `json.loads()`.
4. **Store DataFrames**: The DataFrames are stored in a dictionary, where the key is the table name with a `_df` suffix, and the value is the DataFrame.
5. **Database Connection Closed**: Once all tables are loaded into DataFrames, the database connection is closed.

### Example:
If the database contains a table named `players`, the corresponding DataFrame will be stored in the dictionary with the key `players_df`, and can be accessed as:

```python
players_df = dfs['players_df']


In [3]:
# Function to get the table names
def get_table_names(conn):
    query = "SELECT name FROM sqlite_master WHERE type='table';"
    return pd.read_sql(query, conn)['name'].tolist()

# Function to load tables into DataFrames
def load_tables_to_dfs(conn):
    table_names = get_table_names(conn)
    dataframes = {}
    
    for table in table_names:
        # Load table into a DataFrame
        df = pd.read_sql(f"SELECT * FROM {table}", conn)
        
        # Detect and convert JSON formatted columns (if any)
        for col in df.columns:
            # Check if any entry in the column is a valid JSON (list or dictionary)
            if df[col].apply(lambda x: isinstance(x, str)).all():
                try:
                    # Try parsing the column as JSON
                    df[col] = df[col].apply(lambda x: json.loads(x) if pd.notnull(x) else x)
                except (json.JSONDecodeError, TypeError):
                    # If it fails, skip the column
                    pass
        
        # Store the DataFrame with table name + '_df'
        dataframes[f"{table}_df"] = df
        
    return dataframes

if os.path.exists(data_path + 'dfs_dict.pkl'):
    cell_has_run = True
    # Load the dictionary of DataFrames from the pickle
    with open(data_path + 'dfs_dict.pkl', 'rb') as f:
        dfs = pickle.load(f)
# Check if the flag variable exists in the global scope so that this code does not run twice
if 'cell_has_run' not in globals():
    path = + data_path + "melee_player_database.db"
    
    # Connect to the database
    conn = sqlite3.connect(path)

    # Convert each table into a DataFrame
    dfs = load_tables_to_dfs(conn)

    # Close the connection
    conn.close()

    # Now, you have a dictionary 'dfs' where each key is the table name with '_df' suffix and value is the corresponding DataFrame.
    # For example, to access the DataFrame for a table called 'players':
    # players_df = dfs['players_df']

    dfs['tournament_info_df']['start'] = pd.to_datetime(dfs['tournament_info_df']['start'], unit='s')
    dfs['tournament_info_df']['end'] = pd.to_datetime(dfs['tournament_info_df']['end'], unit='s')

    
    # Set the flag to indicate that the cell has been run
    cell_has_run = True

### Here we adjust the data types of the dataframes so that they are the correct type. (This will be updated as needed.)

In [4]:
dfs['sets_df']['best_of'] = dfs['sets_df']['best_of'].fillna(0).astype(int) 

In [5]:
# # Save the dictionary of DataFrames as a pickle
# with open(data_path + 'dfs_dict.pkl', 'wb') as f:
#     pickle.dump(dfs, f)

### Here we make dataframes that we will use and print the head.

The integers in 'characters' count the number of games the player has played that character. (We verify this for Zain below.)

In [6]:
players_df = dfs['players_df']
players_df.head()


Unnamed: 0,game,player_id,tag,all_tags,prefixes,social,country,state,region,c_country,c_state,c_region,placings,characters,alias
0,melee,Rishi,Rishi,[Rishi],[],{'twitter': []},,,,,,,[{'key': 'mdva-invitational-2017-(challonge-mi...,,
1,melee,15634,lloD,"[lloD, VGz | lloD, Llod]",[],{'twitter': ['lloD74']},United States,VA,,US,CA,Laurel,[{'key': 'mdva-invitational-2017-(challonge-mi...,"{'melee/peach': 1089, 'melee/falco': 1, 'melee...",
2,melee,6126,Zain,"[Zain, DontTestMe]",[PG],{'twitter': ['PG_Zain']},United States,VA,,US,CA,Los Angeles,[{'key': 'mdva-invitational-2017-(challonge-mi...,"{'melee/marth': 1065, 'melee/pichu': 1, 'melee...",DontTestMe
3,melee,Chu,Chu,[Chu],[],{'twitter': []},,,,,,,[{'key': 'mdva-invitational-2017-(challonge-mi...,,
4,melee,5620,Junebug,"[Junebug, LS | VGz Junebug]",[],{'twitter': ['arJunebug']},United States,VA,,US,VA,Richmond,[{'key': 'mdva-invitational-2017-(challonge-mi...,"{'melee/sheik': 46, 'melee/falco': 4, 'melee/g...",


In [7]:
ranking_df = dfs['ranking_df']
ranking_df.head()

Unnamed: 0,game,ranking_name,priority,region,seasons,tournaments,icon
0,melee,SSBMRank,0,world,"[2015, 2016, 2017, 2018, 2019]",[],miom


In [8]:
ranking_seasons_df = dfs['ranking_seasons_df']
ranking_seasons_df.head()

Unnamed: 0,game,ranking_name,season,start,end,total,by_id,by_placing,final,name
0,melee,SSBMRank,2015,1420070400,1451606399,100,"{'6189': 1, '1004': 2, '4465': 3, '1000': 4, '...","{'1': '6189', '2': '1004', '3': '4465', '4': '...",0,
1,melee,SSBMRank,2016,1451606400,1483228799,100,"{'6189': 1, '1004': 2, '1000': 3, '1003': 4, '...","{'1': '6189', '2': '1004', '3': '1000', '4': '...",0,
2,melee,SSBMRank,2017,1483228800,1514764799,100,"{'1004': 1, '6189': 2, '1000': 3, '1003': 4, '...","{'1': '1004', '2': '6189', '3': '1000', '4': '...",0,
3,melee,SSBMRank,2018,1514793600,1546329600,100,"{'1004': 1, '6189': 2, '4465': 3, '15990': 4, ...","{'1': '1004', '2': '6189', '3': '4465', '4': '...",0,
4,melee,SSBMRank,2019,1546329600,1577836800,100,"{'1004': 1, '4465': 2, '1000': 3, '16342': 4, ...","{'1': '1004', '2': '4465', '3': '1000', '4': '...",0,


In [9]:
sets_df = dfs['sets_df']
print(f"{sets_df[sets_df['game_data'].apply(lambda x: len(x) > 0)].shape[0] / sets_df.shape[0]:0.01%} percent of sets have some game data")
sets_df.shape



32.9% percent of sets have some game data


(1795681, 14)

In [10]:
tournament_info_df = dfs['tournament_info_df']
print(tournament_info_df.shape)
print(tournament_info_df.head())


(39675, 20)
    game                                            key  \
0  melee      mdva-invitational-2017-(challonge-mirror)   
1  melee                                          s@sh7   
2  melee  slippi-champions-league-week-1__melee-singles   
3  melee  slippi-champions-league-week-2__melee-singles   
4  melee  slippi-champions-league-week-3__melee-singles   

                                cleaned_name     source  \
0  MDVA Invitational 2017 (Challonge Mirror)  challonge   
1                                      S@SH7  challonge   
2             Slippi Champions League Week 1    pgstats   
3             Slippi Champions League Week 2    pgstats   
4             Slippi Champions League Week 3    pgstats   

                                tournament_name tournament_event season rank  \
0  https://challonge.com/mdva_invitational_2017                      17        
1                   https://challonge.com/sash7                      17        
2                slippi-champions-leag

In [11]:
tournament_info_df[tournament_info_df['cleaned_name']=='DreamHack Denver 2017']

Unnamed: 0,game,key,cleaned_name,source,tournament_name,tournament_event,season,rank,start,end,country,state,city,entrants,placings,losses,bracket_types,online,lat,lng
49,melee,dreamhack-denver-2017-1__super-smash-bros-melee,DreamHack Denver 2017,gg,dreamhack-denver-2017-1,super-smash-bros-melee,17,,2017-10-20 18:00:00,2017-10-23 04:00:00,US,CO,Denver,314,"[[58139, 129], [428866, 193], [523890, 257], [...","{'58139': [{'set_order': 'BH', 'bracket_order'...","b'{""1"": 2, ""2"": 2, ""3"": 2}'",0,39.781011,-104.971494


We copied the information from Liquipedia into a speadsheet and saved it as a CSV which we load as a dataframe.

In [12]:
majors_df = pd.read_csv('melee_majors.csv')
majors_df = majors_df.iloc[6:]
majors_df.shape

(139, 7)

### Clean Up the Tournament Names in Your List
First, let's clean the tournament names in your list to remove duplicates.

In [13]:
tournament_list = list(majors_df['Tournament'])

# Function to remove duplicate phrases
def remove_duplicate_phrases(name):
    # Split the name into words
    words = name.split()
    # Use a sliding window to find duplicates
    for i in range(1, len(words)):
        if words[:i] == words[i:2*i]:
            return ' '.join(words[i:])
    return name

# Clean the tournament names
cleaned_tournament_list = [remove_duplicate_phrases(name) for name in tournament_list]

print("Cleaned Tournament Names:")
for original, cleaned in zip(tournament_list, cleaned_tournament_list):
    print(f"Original: {original}")
    print(f"Cleaned: {cleaned}")
    print()

Cleaned Tournament Names:
Original: Tipped Off Tipped Off 15
Cleaned: Tipped Off 15

Original: Get On My Level Get On My Level X
Cleaned: Get On My Level X

Original: Pat's House Pat's House 4
Cleaned: Pat's House 4

Original: Battle of BC Battle of BC 6
Cleaned: Battle of BC 6

Original: Collision Collision 2024
Cleaned: Collision 2024

Original: GENESIS GENESIS X
Cleaned: GENESIS X

Original: The Big House The Big House 11
Cleaned: The Big House 11

Original: Riptide Riptide 2023
Cleaned: Riptide 2023

Original: Shine Shine 2023
Cleaned: Shine 2023

Original: Super Smash Con Super Smash Con 2023
Cleaned: Super Smash Con 2023

Original: Get On My Level Get On My Level 2023
Cleaned: Get On My Level 2023

Original: Fête Fête 3: By the Sea
Cleaned: Fête 3: By the Sea

Original: Ludwig Ahgren Championship Series Ludwig Ahgren Championship Series 5
Cleaned: Ludwig Ahgren Championship Series 5

Original: Tipped Off Tipped Off 14
Cleaned: Tipped Off 14

Original: Battle of BC Battle of BC 5


In [14]:

import pandas as pd
import re

# Function to clean tournament names
def clean_tournament_name(name):
    # Remove special characters, convert to lowercase, remove extra spaces
    # if ':' in name:
    #     name = name.split(":")[0]
    # if '-' in name:
    #     name = name.split("-")[0]
    name = re.sub(r'[^a-zA-Z0-9\s]', '', name)
    name = name.lower()
    name = re.sub(r'\s+', ' ', name).strip()
    return name

# Clean the major tournament names
major_tournaments_cleaned = [clean_tournament_name(t) for t in cleaned_tournament_list]

# Clean the 'cleaned_name' column in your DataFrame
tournament_info_df['cleaned_name_cleaned'] = tournament_info_df['cleaned_name'].apply(clean_tournament_name)

# Create the 'major' column
tournament_info_df['major'] = tournament_info_df['cleaned_name_cleaned'].isin(major_tournaments_cleaned)

# Verify the results
majors_in_df = tournament_info_df[tournament_info_df['major']]
print("Number of majors found:", majors_in_df.shape[0])
print("Majors found:")
print(majors_in_df['cleaned_name'].unique())


Number of majors found: 107
Majors found:
['Summit Champions League Season 2 Week 1'
 'Summit Champions League Season 2 Week 2'
 'Summit Champions League Season 2 Week 3'
 'Summit Champions League Season 2 Week 4' 'Paragon Los Angeles 2015'
 'B.E.A.S.T 5' 'Apex 2015' 'Sandstorm' 'Press Start' 'CEO 2015'
 'Enthusiast Gaming Live Expo' 'DreamHack Winter 2015' 'Shine 2018'
 'Super Smash Con 2018' 'Low Tier City 6' 'CEO 2018' "Smash 'N' Splash 4"
 'Get On My Level 2018' 'Smash Summit 6' 'Full Bloom 4' 'GENESIS 5'
 'Smash Summit 5' 'Canada Cup 2017' 'DreamHack Denver 2017'
 'The Big House 7' 'Shine 2017' 'Super Smash Con 2017'
 'Get On My Level 2017' 'DreamHack Atlanta 2017' "Smash 'N' Splash 3"
 'Royal Flush' 'DreamHack Austin 2017' 'CEO Dreamland' 'Smash Rivalries'
 'Full Bloom 3' 'Smash Summit Spring 2017' 'GENESIS 4'
 "Don't Park on the Grass" 'UGC Smash Open' 'DreamHack Winter 2016'
 'Smash Summit 3' 'Canada Cup 2016' 'The Big House 6'
 'Super Smash Con 2016' 'WTFox 2' 'CEO 2016' "Smas

In [15]:
missing_majors = [major for major in major_tournaments_cleaned if not tournament_info_df['cleaned_name_cleaned'].isin([major]).any()]

print(len(missing_majors))
for major in missing_majors:
    print(major)

34
tipped off 15
get on my level x
riptide 2023
get on my level 2023
ludwig ahgren championship series 5
tipped off 14
back in blood major upset
smash summit 14
lost tech city 2022
double down 2022
get on my level 2022
smash summit 13
smash world tour swt 2021 melee championship
swt 2021 na east regional finals
riptide 2021
galint melee open galint melee open spring edition
summit champions league slippi champions league season 1 week 4
summit champions league slippi champions league season 1 week 3
summit champions league slippi champions league season 1 week 2
summit champions league slippi champions league season 1 week 1
get on my level get on my line 2020
gtx gametyrant expo 2018
evolution championship series evo 2018
enthusiast gaming live expo 2018
gtx gametyrant expo 2017
genesis fuse doubles circuit finals
evolution championship series evo 2017
shine 2016
evolution championship series evo 2016
super smash con
evolution championship series evo 2015
wtfox
meleefc fc smash 15xr r

In [16]:
temp_df  = tournament_info_df[tournament_info_df['major']==True].copy()
temp_df

Unnamed: 0,game,key,cleaned_name,source,tournament_name,tournament_event,season,rank,start,end,...,city,entrants,placings,losses,bracket_types,online,lat,lng,cleaned_name_cleaned,major
6,melee,summit-champions-league-season-2-week-1__melee...,Summit Champions League Season 2 Week 1,pgstats,summit-champions-league-season-2-week-1,melee-singles,21,,2021-04-11 14:00:00,2021-04-11 14:00:00,...,,20,"[[1000, 1], [6126, 2], [1017, 3], [19554, 3], ...",{},b'{}',1,0.000000,0.000000,summit champions league season 2 week 1,True
7,melee,summit-champions-league-season-2-week-2__melee...,Summit Champions League Season 2 Week 2,pgstats,summit-champions-league-season-2-week-2,melee-singles,21,,2021-04-18 14:00:00,2021-04-18 14:00:00,...,,20,"[[6126, 1], [1000, 2], [1017, 3], [1019, 3], [...",{},b'{}',1,0.000000,0.000000,summit champions league season 2 week 2,True
8,melee,summit-champions-league-season-2-week-3__melee...,Summit Champions League Season 2 Week 3,pgstats,summit-champions-league-season-2-week-3,melee-singles,21,,2021-05-02 14:00:00,2021-05-02 14:00:00,...,,20,"[[1028, 1], [6126, 2], [19554, 3], [1000, 3], ...",{},b'{}',1,0.000000,0.000000,summit champions league season 2 week 3,True
9,melee,summit-champions-league-season-2-week-4__melee...,Summit Champions League Season 2 Week 4,pgstats,summit-champions-league-season-2-week-4,melee-singles,21,,2021-05-09 14:00:00,2021-05-09 14:00:00,...,,16,"[[6126, 1], [1000, 2], [1019, 3], [19554, 4], ...",{},b'{}',1,0.000000,0.000000,summit champions league season 2 week 4,True
11,melee,httpsparagonchallongecomla_2015_melee_singles,Paragon Los Angeles 2015,challonge,https://paragon.challonge.com/la_2015_melee_si...,,15,,2015-09-06 23:45:46,2015-09-07 20:33:07,...,,64,"[[1000, 1], [Azen Zagenite, 49], [1032, 17], [...",{},b'{}',0,,,paragon los angeles 2015,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34685,melee,genesis-x__melee-singles,GENESIS X,gg,genesis-x,melee-singles,24,,2024-02-16 18:00:00,2024-02-19 07:59:00,...,San Jose,1543,"[[1704389, 1025], [2701710, 513], [27282, 769]...","{'1704389': [{'opponent': '3722677', 'bracket_...","b'{""1"": 2, ""3"": 2, ""4"": 2, ""2"": 2}'",0,37.329078,-121.888984,genesis x,True
35805,melee,collision-2024-6__giant-melee,Collision 2024,gg,collision-2024-6,giant-melee,24,,2024-03-15 15:00:00,2024-03-18 03:30:00,...,Parsippany-Troy Hills,75,"[[6126, 33], [16105, 25], [6544, 33], [2188548...","{'6126': [{'opponent': '159410', 'bracket_orde...","b'{""1"": 2, ""2"": 2}'",0,40.861483,-74.412315,collision 2024,True
36389,melee,battle-of-bc-6-7__lowtier-bracket-melee,Battle of BC 6,gg,battle-of-bc-6-7,lowtier-bracket-melee,24,,2024-03-29 20:00:00,2024-04-01 05:00:00,...,Vancouver,19,"[[460459, 13], [2245455, 5], [135756, 2], [138...","{'460459': [{'opponent': '1986398', 'bracket_o...","b'{""1"": 2}'",0,49.289116,-123.116891,battle of bc 6,True
36479,melee,battle-of-bc-6-7__melee-singles,Battle of BC 6,gg,battle-of-bc-6-7,melee-singles,24,,2024-03-30 18:00:00,2024-04-01 05:00:00,...,Vancouver,398,"[[19554, 1], [521193, 129], [66043, 33], [1563...","{'19554': [{'opponent': '324561', 'bracket_ord...","b'{""1"": 2, ""2"": 2, ""3"": 2, ""4"": 2}'",0,49.289116,-123.116891,battle of bc 6,True


### Search for missing majors one by one

In [17]:
# temp_df = tournament_info_df.copy()
# # temp_df = tournament_info_df[tournament_info_df['city']=='Los Vagas']
# year = 2015
# temp_df = temp_df[temp_df['start']>=datetime(year,1,1)]
# temp_df = temp_df[temp_df['start']<datetime(year,12,30)]
# # temp_df = temp_df[temp_df['entrants']==16]
# # temp_df.sort_values('entrants',inplace=True)
# print(temp_df.shape)
# temp_df

## Add missing majors

In [18]:
missing_majors_5=[
39443, #Tipped off 15
38456, #Get on my level X
28389, #riptide 2023
26646, #Get on my level 2023
26137, # ludwig 2023 main event
24918, #Tipped off 14,
22595, # back in blood major upset
17129,# smash summit 14
15764, # lost tech city 2022
12948, # double down 2022
12779, # get on my level 2022
11293, # smash summit 13
7532, #smash_world tour
6377, #SWT 2021 NA east regional finals
5168, #riptide 2021
1233, #Galint Melee Open: Spring Edition
2, #Slippi Champions League Week 1
3,#Slippi Champions League Week 2
4,#Slippi Champions League Week 3
5,#Slippi Champions League Week 4
667, #Get on my line 2020
167, #GameTyrant Expo 2018
30, #EVO 2018	
41, #Enthusiast Gaming Live Expo 2018
51, #GameTyrant Expo 2017
# genesis fuse doubles circuit finals
58, #EVO 2017
#Shine 2016
26, #EVO 2016
141, # Supe Smash con
25, #EVO 2015
# WTFox
165, #FC Smash 15XR: Return
14 #paragon 2015
]

tournament_info_df.loc[missing_majors_5, 'major'] = True


In [None]:
major_tournament_info_df = tournament_info_df[tournament_info_df['major']==True]
major_tournament_info_df

Unnamed: 0,game,key,cleaned_name,source,tournament_name,tournament_event,season,rank,start,end,...,city,entrants,placings,losses,bracket_types,online,lat,lng,cleaned_name_cleaned,major
15,melee,httpsapex2015meleechallongecomsingles,Apex 2015,challonge,https://apex2015melee.challonge.com/singles,,15,,2015-02-01 04:33:03,2015-02-06 18:58:36,...,,48,"[[1000, 4], [6189, 2], [1002, 1], [1003, 9], [...",{},b'{}',0,,,apex 2015,True
17748,melee,apex-2022__melee-singles-starts-friday,Apex 2022,gg,apex-2022,melee-singles-starts-friday,22,,2022-11-18 15:00:00,2022-11-21 04:59:00,...,Secaucus,461,"[[16256, 65], [665043, 257], [3990, 17], [6982...","{'16256': [{'opponent': '59065', 'bracket_orde...","b'{""1"": 2, ""2"": 2, ""3"": 2, ""4"": 2}'",0,40.787948,-74.042908,apex 2022,True
13,melee,httpsbeastsmashchallongecomb5msb,B.E.A.S.T 5,challonge,https://beastsmash.challonge.com/B5MSB,,15,,2015-01-11 14:16:13,2015-01-13 01:02:28,...,,64,"[[1000, 3], [6189, 2], [4465, 1], [19573, 4], ...",{},b'{}',0,,,beast 5,True
12151,melee,battle-of-bc-4-2__melee-singles-bracket,Battle of BC 4,gg,battle-of-bc-4-2,melee-singles-bracket,22,,2022-06-11 18:00:00,2022-06-13 03:00:00,...,Vancouver,281,"[[62646, 193], [2551741, 97], [2192482, 129], ...","{'62646': [{'opponent': '3299', 'bracket_order...","b'{""1"": 2, ""2"": 2, ""3"": 2, ""4"": 2}'",0,49.266478,-123.249865,battle of bc 4,True
24472,melee,battle-of-bc-5-5__melee-singles,Battle of BC 5,gg,battle-of-bc-5-5,melee-singles,23,,2023-05-20 18:00:00,2023-05-22 04:00:00,...,Vancouver,409,"[[16105, 4], [172183, 257], [2721355, 65], [58...","{'16105': [{'opponent': '1021', 'bracket_order...","b'{""1"": 2, ""2"": 2, ""3"": 2}'",0,49.288695,-123.111191,battle of bc 5,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7532,melee,the-smash-world-tour-championships__melee-cham...,The Smash World Tour Championships Melee Champ...,gg,the-smash-world-tour-championships,melee-championships,21,,2021-12-17 15:00:00,2021-12-20 04:59:00,...,Orlando,40,"[[276942, 33], [11139, 9], [1028, 2], [1003, 1...","{'276942': [{'opponent': '46349', 'bracket_ord...","b'{""1"": 3, ""2"": 2}'",0,28.424695,-81.459138,the smash world tour championships melee champ...,True
24918,melee,tipped-off-14-resurgence__melee-singles,Tipped Off 14: Resurgence,gg,tipped-off-14-resurgence,melee-singles,23,,2023-06-03 15:00:00,2023-06-05 03:58:00,...,Marietta,344,"[[1335864, 49], [675878, 65], [55354, 97], [27...","{'1335864': [{'opponent': '434815', 'bracket_o...","b'{""1"": 2, ""3"": 2, ""4"": 2, ""2"": 2}'",0,33.938010,-84.515963,tipped off 14 resurgence,True
39443,melee,tipped-off-15-connected-1__melee-singles,Tipped Off 15: Connected,gg,tipped-off-15-connected-1,melee-singles,24,,2024-06-15 15:00:00,2024-06-17 03:57:00,...,Marietta,513,"[[6126, 5], [2436958, 257], [2393715, 385], [1...","{'6126': [{'opponent': '1000', 'bracket_order'...","b'{""1"": 2, ""3"": 2, ""4"": 2, ""2"": 2}'",0,33.938010,-84.515963,tipped off 15 connected,True
133,melee,ugc-smash-open__melee-singles,UGC Smash Open,gg,ugc-smash-open,melee-singles,16,,2016-12-02 15:00:00,2016-12-05 05:55:00,...,Collinsville,255,"[[15168, 49], [28391, 49], [242818, 129], [229...","{'15168': [{'set_order': 'AT', 'bracket_order'...","b'{""1"": 2, ""2"": 2, ""3"": 2}'",0,38.681292,-90.017496,ugc smash open,True


## Remove not majors
Going through the list on the website and comparing to the majors we found, remove the ones that were miss labelled. That completes the list of majors

In [None]:
not_actually_majors = [
36389,  #battle-of-bc-6-7__lowtier-bracket-melee
16526, #ludwig-smash-invitational__melee-singles-lcq
]
tournament_info_df.loc[not_actually_majors, 'major'] = False


We seem to be missing 2 majors.

In [23]:
major_tournament_info_df = tournament_info_df[tournament_info_df['major']==True]
major_tournament_info_df

Unnamed: 0,game,key,cleaned_name,source,tournament_name,tournament_event,season,rank,start,end,...,city,entrants,placings,losses,bracket_types,online,lat,lng,cleaned_name_cleaned,major
2,melee,slippi-champions-league-week-1__melee-singles,Slippi Champions League Week 1,pgstats,slippi-champions-league-week-1,melee-singles,20,,2020-10-11 14:00:00,2020-10-11 14:00:00,...,,20,"[[1000, 1], [6126, 2], [4107, 3], [19554, 3], ...",{},b'{}',1,0.000000,0.000000,slippi champions league week 1,True
3,melee,slippi-champions-league-week-2__melee-singles,Slippi Champions League Week 2,pgstats,slippi-champions-league-week-2,melee-singles,20,,2020-10-18 14:00:00,2020-10-18 14:00:00,...,,20,"[[6126, 1], [4107, 2], [1000, 3], [19554, 3], ...",{},b'{}',1,0.000000,0.000000,slippi champions league week 2,True
4,melee,slippi-champions-league-week-3__melee-singles,Slippi Champions League Week 3,pgstats,slippi-champions-league-week-3,melee-singles,20,,2020-10-25 14:00:00,2020-10-25 14:00:00,...,,20,"[[6126, 1], [3359, 2], [19554, 3], [4107, 3], ...",{},b'{}',1,0.000000,0.000000,slippi champions league week 3,True
5,melee,slippi-champions-league-week-4__melee-singles,Slippi Champions League Week 4,pgstats,slippi-champions-league-week-4,melee-singles,20,,2020-10-31 14:00:00,2020-10-31 14:00:00,...,,20,"[[6126, 1], [3357, 2], [15990, 3], [3359, 3], ...",{},b'{}',1,0.000000,0.000000,slippi champions league week 4,True
6,melee,summit-champions-league-season-2-week-1__melee...,Summit Champions League Season 2 Week 1,pgstats,summit-champions-league-season-2-week-1,melee-singles,21,,2021-04-11 14:00:00,2021-04-11 14:00:00,...,,20,"[[1000, 1], [6126, 2], [1017, 3], [19554, 3], ...",{},b'{}',1,0.000000,0.000000,summit champions league season 2 week 1,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35805,melee,collision-2024-6__giant-melee,Collision 2024,gg,collision-2024-6,giant-melee,24,,2024-03-15 15:00:00,2024-03-18 03:30:00,...,Parsippany-Troy Hills,75,"[[6126, 33], [16105, 25], [6544, 33], [2188548...","{'6126': [{'opponent': '159410', 'bracket_orde...","b'{""1"": 2, ""2"": 2}'",0,40.861483,-74.412315,collision 2024,True
36479,melee,battle-of-bc-6-7__melee-singles,Battle of BC 6,gg,battle-of-bc-6-7,melee-singles,24,,2024-03-30 18:00:00,2024-04-01 05:00:00,...,Vancouver,398,"[[19554, 1], [521193, 129], [66043, 33], [1563...","{'19554': [{'opponent': '324561', 'bracket_ord...","b'{""1"": 2, ""2"": 2, ""3"": 2, ""4"": 2}'",0,49.289116,-123.116891,battle of bc 6,True
37728,melee,pat-s-house-4-2__melee-singles,Pat's House 4,gg,pat-s-house-4-2,melee-singles,24,,2024-04-27 19:00:00,2024-04-29 05:00:00,...,San Diego,307,"[[19554, 3], [6126, 1], [5080, 5], [16105, 7],...","{'19554': [{'opponent': '1000', 'bracket_order...","b'{""2"": 2, ""3"": 2, ""1"": 2}'",0,32.879720,-117.236195,pats house 4,True
38456,melee,get-on-my-level-x-canadian-fighting-game-champ...,Get On My Level X - Canadian Fighting Game Cha...,gg,get-on-my-level-x-canadian-fighting-game-champ...,super-smash-bros-melee-singles,24,,2024-05-18 19:00:00,2024-05-20 03:00:00,...,Toronto,770,"[[2439005, 769], [1277599, 129], [22324, 257],...","{'2439005': [{'opponent': '158055', 'bracket_o...","b'{""1"": 2, ""2"": 2, ""3"": 2, ""4"": 2}'",0,43.651164,-79.384172,get on my level x canadian fighting game champ...,True
