In [1]:
import numpy as np
import pandas as pd
import os
from utils.utils import *

In [2]:
import requests
from bs4 import BeautifulSoup

def list_files_in_github_dir(repo, path, branch="main"):
    api_url = f"https://api.github.com/repos/{repo}/contents/{path}?ref={branch}"
    response = requests.get(api_url)
    
    if response.status_code == 200:
        files = [item['name'] for item in response.json() if item['type'] == 'file']
        return files
    else:
        print(f"Error: Unable to fetch directory contents (Status code: {response.status_code})")
        return []

# Repository and directory details
repo = "piedorr/MocStats"
path = "data/raw_csvs"

# List all files
files = list_files_in_github_dir(repo, path)
file_names = ['https://raw.githubusercontent.com/piedorr/MocStats/refs/heads/main/data/raw_csvs/' + a for a in files]

In [3]:
char_records = pd.concat([read_and_add_csv(a, '_char.csv') for a in file_names if 'char' in a])

# select useful columns
char_records = char_records[['uid', 'version', 'name', 'weapon', 'cons']]
char_records = char_records.rename(columns={'name': 'team_leader'})

In [4]:
char_records['version'] = char_records['version'].str.replace('_pf','')

## Apocalyptic Shadow

In [5]:
as_records = pd.concat([read_and_add_csv(a, '_as.csv') for a in file_names if 'as' in a and 'char' not in a])
as_records = as_records.dropna(subset=['ch1', 'ch2', 'ch3', 'ch4']).query('floor==4 & star_num > 1')

melted = as_records.melt(id_vars=['uid', 'version'], value_vars=['ch1', 'ch2', 'ch3', 'ch4'], value_name='name').drop(columns='variable')
counter = melted.groupby(['version', 'name']).size().reset_index(name='count')
uid_count = melted.groupby(['version'])['uid'].nunique().reset_index(name='records')

counter = counter.merge(uid_count, on='version')
counter['usage'] = counter['count'] / counter['records']

In [6]:
as_records = create_team_info(as_records, drop_ch=True)
data = as_records.merge(char_records, on = ['version', 'team_leader', 'uid'], how='left')
data = data.drop(columns=[f'cons{i}' for i in range(1,5)])

In [7]:
data = data[data['version'].str[-1] == '1']
data.to_csv('data/as.csv', index=False)

## Memory of Chaos

In [16]:
df = pd.concat([read_and_add_csv(a , '.csv') for a in file_names if '_' not in a.split('/')[-1]]).query('version > "1.29"')
df = df.query('round_num > 0 & star_num >= 2 & ch4.notnull()')
df['floor'] = df['floor'].astype('str').str.extract('(\d+)').astype('int')

  df['floor'] = df['floor'].astype('str').str.extract('(\d+)').astype('int')


In [17]:
floor12 = df.query('floor==12')
floor10 = df.query('floor==10').query('version < "1.6"').query('round_num >= 10')
data = pd.concat([floor10,floor12])

data = create_team_info(data, drop_ch=True)
data = data.merge(char_records, on = ['version', 'team_leader', 'uid'], how='left')
data = data.dropna(subset='team_leader')
data = data.drop(columns=[f'cons{i}' for i in range(1,5)])

In [18]:
data = data[(data['version'] < '2.3') | (data['version'].str[-1] == '3')]

In [11]:
data.to_csv('data/moc.csv', index=False)

## Pure Fiction

In [12]:
pf_records = pd.concat([read_and_add_csv(a, '_pf.csv') for a in file_names if 'pf' in a and 'char' not in a])
pf_records = pf_records.dropna(subset=['ch1', 'ch2', 'ch3', 'ch4']).query('floor==4 & star_num > 1')

melted = pf_records.melt(id_vars=['uid', 'version'], value_vars=['ch1', 'ch2', 'ch3', 'ch4'], value_name='name').drop(columns='variable')
counter = melted.groupby(['version', 'name']).size().reset_index(name='count')
uid_count = melted.groupby(['version'])['uid'].nunique().reset_index(name='records')

counter = counter.merge(uid_count, on='version')
counter['usage'] = counter['count'] / counter['records']

In [13]:
pf_records = create_team_info(pf_records, drop_ch=True)
data = pf_records.merge(char_records, on = ['version', 'team_leader', 'uid'], how='left')


In [14]:
data = data.drop(columns=[f'cons{i}' for i in range(1,5)])

In [15]:
data = data[(data['version'] < '2.3') | (data['version'].str[-1] == '2')]
data.to_csv('data/pf.csv', index=False)