In [2]:
import pandas as pd
import os
from datetime import datetime

# Define file paths for your CSV data
match_details_csv = '../data/raw/match_details.csv'
entries_csv = '../data/raw/entries.csv'
match_ids_csv = '../data/raw/match_ids.csv'
summoner_details_csv = '../data/raw/summoner_details.csv'

# Ensure all CSV files exist
for file in [match_details_csv, entries_csv, match_ids_csv, summoner_details_csv]:
    if not os.path.exists(file):
        print(f"{file} not found.")
        exit()

# Load the data into pandas DataFrames
match_details_df = pd.read_csv(match_details_csv)
entries_df = pd.read_csv(entries_csv)
match_ids_df = pd.read_csv(match_ids_csv)
summoner_details_df = pd.read_csv(summoner_details_csv)

# Convert 'game_creation' column (which should be in milliseconds) to datetime
match_details_df['game_creation_converted'] = pd.to_datetime(match_details_df['game_creation'], unit='ms')

# Calculate the difference in days from today
today = pd.Timestamp.now()
match_details_df['days_diff'] = (today - match_details_df['game_creation_converted']).dt.days

# Now filter match details for accounts whose last match is between 29-40 days ago
filtered_match_details = match_details_df[(match_details_df['days_diff'] >= 29) & (match_details_df['days_diff'] <= 40)]

# Get the summoner_ids of the filtered accounts
filtered_summoner_ids = filtered_match_details['summoner_id'].unique()

# Filter entries, match_ids, and summoner_details based on the filtered summoner_ids
filtered_entries = entries_df[entries_df['summoner_id'].isin(filtered_summoner_ids)]
filtered_match_ids = match_ids_df[match_ids_df['puuid'].isin(summoner_details_df[summoner_details_df['summoner_id'].isin(filtered_summoner_ids)]['puuid'])]
filtered_summoner_details = summoner_details_df[summoner_details_df['summoner_id'].isin(filtered_summoner_ids)]

# Save the cleaned and filtered data into the raw folder
filtered_match_details.to_csv(f'../data/raw/match_details_filtered.csv', index=False)
filtered_entries.to_csv(f'../data/raw/entries_filtered.csv', index=False)
filtered_match_ids.to_csv(f'../data/raw/match_ids_filtered.csv', index=False)
filtered_summoner_details.to_csv(f'../data/raw/summoner_details_filtered.csv', index=False)

print(f"Filtered data saved in the raw folder.")


Filtered data saved in the raw folder.
