In [2]:
import pandas as pd
from datetime import datetime, timedelta
import os

# Define file paths for your CSV data
summoner_details_csv = '../data/raw/summoner_details.csv'
match_details_csv = '../data/raw/match_details.csv'
entries_csv = '../data/raw/entries.csv'

# Ensure all CSV files exist
if not os.path.exists(summoner_details_csv):
    print(f"{summoner_details_csv} not found.")
    exit()

if not os.path.exists(match_details_csv):
    print(f"{match_details_csv} not found.")
    exit()

if not os.path.exists(entries_csv):
    print(f"{entries_csv} not found.")
    exit()

# Load the data into pandas DataFrames
summoner_details_df = pd.read_csv(summoner_details_csv)
match_details_df = pd.read_csv(match_details_csv)
entries_df = pd.read_csv(entries_csv)

# Convert the 'game_creation' column (milliseconds) to a proper datetime format
match_details_df['game_creation_converted'] = pd.to_datetime(match_details_df['game_creation'], unit='ms')

# Filter out rows in match_details that correspond to players in summoner_details
target_summoners = summoner_details_df['summoner_id'].unique()
filtered_match_details = match_details_df[match_details_df['summoner_id'].isin(target_summoners)].copy()

# Calculate the difference in days
today = datetime.now()
filtered_match_details['days_diff'] = (today - filtered_match_details['game_creation_converted']).dt.days

# Define the groups based on days_diff
def assign_time_group(days):
    if days <= 7:
        return 'Active Players (1-7 days)'
    elif 8 <= days <= 14:
        return 'Slightly Inactive (8-14 days)'
    elif 15 <= days <= 28:
        return 'Moderately Inactive (15-28 days)'
    elif 29 <= days <= 40:
        return 'Highly Inactive (29-40 days)'
    elif 41 <= days <= 60:
        return 'At Risk of Churn (41-60 days)'
    else:
        return 'Beyond 60 days - Lost'

# Apply the function to create the time groups
filtered_match_details['time_group'] = filtered_match_details['days_diff'].apply(assign_time_group)

# Joining with the entries DataFrame to get the rank information
combined_data = pd.merge(filtered_match_details, entries_df, on='summoner_id', how='left')

# Group by the new time_group and rank, and count the number of results in each group
grouped_results = combined_data.groupby(['time_group', 'rank']).size().reset_index(name='count')

# Print the grouped results
print(grouped_results)


                          time_group rank  count
0          Active Players (1-7 days)    I   1243
1          Active Players (1-7 days)   II   1381
2          Active Players (1-7 days)  III   1366
3          Active Players (1-7 days)   IV   1294
4      At Risk of Churn (41-60 days)    I    156
5      At Risk of Churn (41-60 days)   II    163
6      At Risk of Churn (41-60 days)  III    170
7      At Risk of Churn (41-60 days)   IV    140
8              Beyond 60 days - Lost    I    278
9              Beyond 60 days - Lost   II    189
10             Beyond 60 days - Lost  III    153
11             Beyond 60 days - Lost   IV    176
12      Highly Inactive (29-40 days)    I    124
13      Highly Inactive (29-40 days)   II    108
14      Highly Inactive (29-40 days)  III    118
15      Highly Inactive (29-40 days)   IV    117
16  Moderately Inactive (15-28 days)    I    278
17  Moderately Inactive (15-28 days)   II    239
18  Moderately Inactive (15-28 days)  III    239
19  Moderately Inact