In [2]:
import pandas as pd
import xlwings as xw
import xlsxwriter
import os
import pickle

### Organize competition data based on year

In [8]:
'''
Create empty excel data sheet to save competition data with chronological order
'''


folders = os.listdir("chronological")


for folder in folders:
    # folder is either M or W
    if folder == '.DS_Store' or 'xlsx' in folder:
        continue

    files = os.listdir("chronological/"+folder)
    files.sort(reverse = True)
    
    for f_name in files:
        if f_name == '.DS_Store':
            continue
        year = f_name.split('_')[-1][:4]
        gender = f_name.split('_')[0]
        workbook_name = 'chronological/'+("{}_{}.xlsx".format(gender, year))
        folders_updates= os.listdir("chronological")
        if ("{}_{}.xlsx".format(gender, year)) not in folders_updates:
            # Create an empty workbook
            workbook = xlsxwriter.Workbook(workbook_name)

            workbook.close()


In [13]:
folders = os.listdir("chronological")

'''
Process the competition data into chronological ordered excel sheets.
-> create Excel sheet for every year (target file)
-> read the competition data, order them by competition year
-> for each competition, order data by athlete rank, then paste data into the target file of the specific sheet

'''

for to_xlsx in folders:
    if 'xlsx' in to_xlsx:
        # organize file together
        workbook_name = 'chronological/'+to_xlsx
        with pd.ExcelWriter(workbook_name) as writer:  
            file_organizer = {}
            gender = to_xlsx.split('_')[0]
            year = to_xlsx.split('_')[1][0:4]
            for f_name in os.listdir("chronological/"+gender):
                if f_name == '.DS_Store' or f_name.split('_')[-1][:4] != year:
                    continue
                date = int(f_name.split('_')[-1][4:8])
                sheet_name = f_name.split('.')[0]
                if date in file_organizer:
                    file_organizer[date].append(sheet_name)
                else:
                    file_organizer[date] = [sheet_name]
            for d in sorted(file_organizer):
                for sheet_name in file_organizer[d]:
                    pd.read_csv("chronological/" + gender + "/" + sheet_name + '.csv', sep='\t',encoding='UTF-16', header = 0)\
                    .sort_values(by=['Rank'])\
                    .to_excel(writer,sheet_name = sheet_name)

In [30]:
folders = os.listdir("chronological")

'''

    GOAL: Organize data into Excel file, easier to visualize the data
'''
M_count = 0
W_count = 0
for to_xlsx in folders:
    if 'xlsx' in to_xlsx:
        gender = to_xlsx.split('_')[0]
        xls = pd.ExcelFile('chronological' +'/' + to_xlsx)
        sheet_names = xls.sheet_names
        if gender == 'M':
            M_count += len(sheet_names)
        else:
            W_count += len(sheet_names)

In [31]:
M_count, W_count

(381, 379)

In [34]:
folders = os.listdir("chronological")
folders.sort(reverse = False)

In [35]:
folders

['.DS_Store',
 'M',
 'M_1995.xlsx',
 'M_1996.xlsx',
 'M_1997.xlsx',
 'M_1998.xlsx',
 'M_1999.xlsx',
 'M_2000.xlsx',
 'M_2001.xlsx',
 'M_2002.xlsx',
 'M_2003.xlsx',
 'M_2004.xlsx',
 'M_2005.xlsx',
 'M_2006.xlsx',
 'M_2007.xlsx',
 'M_2008.xlsx',
 'M_2009.xlsx',
 'M_2010.xlsx',
 'M_2011.xlsx',
 'M_2012.xlsx',
 'M_2013.xlsx',
 'M_2014.xlsx',
 'M_2015.xlsx',
 'M_2016.xlsx',
 'M_2017.xlsx',
 'M_2018.xlsx',
 'M_2019.xlsx',
 'W',
 'W_1995.xlsx',
 'W_1996.xlsx',
 'W_1997.xlsx',
 'W_1998.xlsx',
 'W_1999.xlsx',
 'W_2000.xlsx',
 'W_2001.xlsx',
 'W_2002.xlsx',
 'W_2003.xlsx',
 'W_2004.xlsx',
 'W_2005.xlsx',
 'W_2006.xlsx',
 'W_2007.xlsx',
 'W_2008.xlsx',
 'W_2009.xlsx',
 'W_2010.xlsx',
 'W_2011.xlsx',
 'W_2012.xlsx',
 'W_2013.xlsx',
 'W_2014.xlsx',
 'W_2015.xlsx',
 'W_2016.xlsx',
 'W_2017.xlsx',
 'W_2018.xlsx',
 'W_2019.xlsx',
 'W_2020.xlsx']

### Organize the offcial athletes ranking

In [3]:
'''
Process Man ranking list data into one Excel sheet
'''
ranking_list_folder = 'HP_M'

workbook_name = 'ranking_list/'+ranking_list_folder+'.xlsx'
workbook = xlsxwriter.Workbook(workbook_name)

workbook.close()

files = os.listdir("ranking_list/"+ranking_list_folder)
files.sort(reverse = True)

with pd.ExcelWriter(workbook_name) as writer:  
    for f_name in files:
        if f_name == '.DS_Store':
            continue
        sheet_name = f_name.split('.')[0]
        sheet_name = sheet_name.replace('-','')
        pd.read_csv("ranking_list/" + ranking_list_folder+ "/" +f_name,sep=',', encoding= 'unicode_escape', header = 0)\
        .to_excel(writer,sheet_name = sheet_name)

In [4]:
'''
Process Woman ranking list data into one Excel sheet
'''
ranking_list_folder = 'HP_W'

workbook_name = 'ranking_list/'+ranking_list_folder+'.xlsx'
workbook = xlsxwriter.Workbook(workbook_name)

workbook.close()

files = os.listdir("ranking_list/"+ranking_list_folder)
files.sort(reverse = True)

with pd.ExcelWriter(workbook_name) as writer:  
    for f_name in files:
        if f_name == '.DS_Store':
            continue
        sheet_name = f_name.split('.')[0]
        sheet_name = sheet_name.replace('-','')
        pd.read_csv("ranking_list/" + ranking_list_folder+ "/" +f_name,sep=',', encoding= 'unicode_escape', header = 0)\
        .to_excel(writer,sheet_name = sheet_name)