In [4]:
import mysql.connector
import pandas as pd

# Database connection details
db_config = {
    'user': 'lol_user',
    'password': 'your_password',
    'host': '35.203.46.85',
    'database': 'lol_retention'
}

# Function to pull data from the database and load it into a DataFrame
def load_data_to_dataframe(query):
    try:
        # Establish a connection to the database
        cnx = mysql.connector.connect(**db_config)
        cursor = cnx.cursor()

        # Execute the query
        cursor.execute(query)

        # Fetch all the rows
        rows = cursor.fetchall()

        # Get column names
        column_names = [i[0] for i in cursor.description]

        # Create a DataFrame
        df = pd.DataFrame(rows, columns=column_names)

        cursor.close()
        cnx.close()

        return df

    except mysql.connector.Error as err:
        print(f"Error: {err}")
        return None

# Query to pull the data with the `rank` included
query = """
SELECT 
    md.*,
    e.`rank` AS division
FROM 
    match_details md
JOIN 
    summoner_details sd ON md.puuid = sd.puuid
JOIN 
    entries e ON sd.summoner_id = e.summoner_id
WHERE 
    md.game_creation_converted IS NOT NULL
ORDER BY 
    md.game_creation_converted ASC;
"""

# Load data into DataFrame
df = load_data_to_dataframe(query)

# Display the DataFrame
print(df)

if __name__ == "__main__":
    # If you want to perform additional operations, you can do so here
    print(df.head())  # Print the first few rows of the DataFrame


            match_id  game_creation  game_duration game_mode     game_type  \
0     NA1_5000175667  1716003518866           1457   CLASSIC  MATCHED_GAME   
1     NA1_5000587935  1716058503579           2153   CLASSIC  MATCHED_GAME   
2     NA1_5001729337  1716172970375           1677   CLASSIC  MATCHED_GAME   
3     NA1_5002301863  1716247755726           2291   CLASSIC  MATCHED_GAME   
4     NA1_5002430373  1716256779504           1516   CLASSIC  MATCHED_GAME   
...              ...            ...            ...       ...           ...   
2219  NA1_5103949983  1725467949278           1482   CLASSIC  MATCHED_GAME   
2220  NA1_5103950973  1725467977829           1251    CHERRY  MATCHED_GAME   
2221  NA1_5103953745  1725468018159           1331   CLASSIC  MATCHED_GAME   
2222  NA1_5103950389  1725468455931           1648   CLASSIC  MATCHED_GAME   
2223  NA1_5103952710  1725468956097            923   CLASSIC  MATCHED_GAME   

        game_version  map_id  participant_id  \
0     14.10.585

In [5]:

print(df.columns)

Index(['match_id', 'game_creation', 'game_duration', 'game_mode', 'game_type',
       'game_version', 'map_id', 'participant_id', 'summoner_id', 'puuid',
       'champion_id', 'spell1_id', 'spell2_id', 'item0', 'item1', 'item2',
       'item3', 'item4', 'item5', 'item6', 'kills', 'deaths', 'assists',
       'total_damage_dealt', 'total_damage_taken', 'gold_earned',
       'vision_score', 'total_minions_killed', 'team_position', 'win',
       'game_creation_converted', 'division'],
      dtype='object')


In [6]:
from datetime import datetime, timedelta

# Assuming `df` is your DataFrame with match details

# Define today's date
today = datetime.now().date()

# Define date filters
yesterday = today - timedelta(days=4)
four_weeks_ago = today - timedelta(weeks=4)
five_weeks_ago = today - timedelta(weeks=7)

# Active players: Played today or yesterday
active_players = df[(df['game_creation_converted'].dt.date >= yesterday) & 
                    (df['game_creation_converted'].dt.date <= today)]

# Inactive players: Played between 4 to 5 weeks ago
inactive_players = df[(df['game_creation_converted'].dt.date >= five_weeks_ago) & 
                      (df['game_creation_converted'].dt.date <= four_weeks_ago)]

# Sample up to 25 players per division from active and inactive players
active_samples = active_players.groupby('division').apply(lambda x: x.sample(n=min(len(x), 125), random_state=42))
inactive_samples = inactive_players.groupby('division').apply(lambda x: x.sample(n=min(len(x), 125), random_state=42))

# Combine the two samples
final_sample = pd.concat([active_samples, inactive_samples])

# Display the final DataFrame
print(final_sample)

                     match_id  game_creation  game_duration game_mode  \
division                                                                
I        1532  NA1_5102864658  1725345596046           1842   CLASSIC   
         1154  NA1_5101278864  1725230659460           1802   CLASSIC   
         1865  NA1_5103595134  1725419970832           1815   CLASSIC   
         1212  NA1_5101644477  1725249798626           1481    CHERRY   
         2000  NA1_5103713885  1725427420526           1696   CLASSIC   
...                       ...            ...            ...       ...   
IV       394   NA1_5070898666  1722487200249           1353   CLASSIC   
         278   NA1_5054741828  1721663638515           1225   CLASSIC   
         327   NA1_5063049258  1722027512135           2096   CLASSIC   
         419   NA1_5073631721  1722678082046           1350   CLASSIC   
         377   NA1_5069797066  1722407641536           2137   CLASSIC   

                  game_type    game_version  map_i

  active_samples = active_players.groupby('division').apply(lambda x: x.sample(n=min(len(x), 125), random_state=42))
  inactive_samples = inactive_players.groupby('division').apply(lambda x: x.sample(n=min(len(x), 125), random_state=42))


In [7]:
final_sample.to_csv('final_sample.csv', index=False)

In [8]:
'''
# Extract the PUUIDs to keep
selected_puuids = final_sample['puuid'].unique().tolist()

# Generate the SQL query to delete from summoner_details
delete_summoner_details_query = f"""
DELETE FROM summoner_details 
WHERE puuid NOT IN ({', '.join(['%s'] * len(selected_puuids))})
"""

# Establish database connection
db_config = {
    'user': 'lol_user',
    'password': 'your_password',
    'host': '35.203.46.85',
    'database': 'lol_retention'
}

try:
    cnx = mysql.connector.connect(**db_config)
    cursor = cnx.cursor()

    # Delete from summoner_details
    cursor.execute(delete_summoner_details_query, selected_puuids)

    # Fetch corresponding summoner_ids from summoner_details based on the selected puuids
    fetch_summoner_ids_query = f"""
    SELECT summoner_id 
    FROM summoner_details 
    WHERE puuid IN ({', '.join(['%s'] * len(selected_puuids))})
    """
    cursor.execute(fetch_summoner_ids_query, selected_puuids)
    summoner_ids = [row[0] for row in cursor.fetchall()]

    if summoner_ids:
        # Generate the SQL query to delete from entries using summoner_id
        delete_entries_query = f"""
        DELETE FROM entries 
        WHERE summoner_id NOT IN ({', '.join(['%s'] * len(summoner_ids))})
        """
        cursor.execute(delete_entries_query, summoner_ids)

    # Truncate match_ids and match_details tables
    cursor.execute("TRUNCATE TABLE match_ids")
    cursor.execute("TRUNCATE TABLE match_details")

    cnx.commit()
    cursor.close()
    cnx.close()

    print("Data successfully cleaned and truncated.")

except mysql.connector.Error as err:
    print(f"Database error: {err}")

'''


Data successfully cleaned and truncated.
