In [1]:
import glob
import os
import pandas as pd

# Folder path where CSV files are stored
folder_path = r'C:\Users\thoma\Code\Projects\Fantasy-Premier-League\Data\Team\Defensive'  # Change this to the actual folder path

# Use glob to get all CSV files in the folder
csv_files = glob.glob(os.path.join(folder_path, "*.csv"))

# Dictionary to store DataFrames
dataframes = {}

# Loop through each CSV file and store them in the dictionary
for file in csv_files:
    # Get the file name without the path and extension
    file_name = os.path.splitext(os.path.basename(file))[0]
    
    # Debug: Print the file names being processed
    print(f"Loading file: {file_name}")
    
    # Read the CSV into a DataFrame
    df = pd.read_csv(file)
    
    # Store the DataFrame in the dictionary with the file name as the key
    dataframes[file_name] = df

Loading file: GW_1
Loading file: GW_3
Loading file: GW_4
Loading file: GW_5
Loading file: GW_6
Loading file: GW_7


In [2]:
csv_files

['C:\\Users\\thoma\\Code\\Projects\\Fantasy-Premier-League\\Data\\Team\\Defensive\\GW_1.csv',
 'C:\\Users\\thoma\\Code\\Projects\\Fantasy-Premier-League\\Data\\Team\\Defensive\\GW_3.csv',
 'C:\\Users\\thoma\\Code\\Projects\\Fantasy-Premier-League\\Data\\Team\\Defensive\\GW_4.csv',
 'C:\\Users\\thoma\\Code\\Projects\\Fantasy-Premier-League\\Data\\Team\\Defensive\\GW_5.csv',
 'C:\\Users\\thoma\\Code\\Projects\\Fantasy-Premier-League\\Data\\Team\\Defensive\\GW_6.csv',
 'C:\\Users\\thoma\\Code\\Projects\\Fantasy-Premier-League\\Data\\Team\\Defensive\\GW_7.csv']

In [3]:
# Create empty gameweek list
gameweeks = []

# Get all gameweek information available
for file in csv_files:
    trimmed_file = int(file[75:76])
    gameweeks.append(trimmed_file)


In [4]:
# Create a new empty list to store selected DataFrames
gameweek_dfs = []

# Loop through each gameweek number
for gw in gameweeks:
    # Construct the key dynamically based on the gameweek number
    gw_key = f'GW_{gw}'
    
    # Debug: Check if the constructed key matches any of the loaded DataFrames
    print(f"Looking for key: {gw_key}")
    
    # Check if the key exists in the dataframes dictionary
    if gw_key in dataframes:
        # Append the DataFrame to the gameweek_dfs list
        gameweek_dfs.append(dataframes[gw_key])
        print(f"Appended DataFrame for {gw_key}")
    else:
        print(f"Warning: {gw_key} not found in dataframes")

# Debug: Optionally inspect the gameweek_dfs list
print(f"Extracted {len(gameweek_dfs)} DataFrames corresponding to the selected gameweeks.")

Looking for key: GW_1
Appended DataFrame for GW_1
Looking for key: GW_3
Appended DataFrame for GW_3
Looking for key: GW_4
Appended DataFrame for GW_4
Looking for key: GW_5
Appended DataFrame for GW_5
Looking for key: GW_6
Appended DataFrame for GW_6
Looking for key: GW_7
Appended DataFrame for GW_7
Extracted 6 DataFrames corresponding to the selected gameweeks.


In [5]:
# Merge two gameweeks on the right (the most current gameweek)
merged = pd.merge(gameweek_dfs[0], gameweek_dfs[1], on='Team', suffixes=('_GWp', '_GWc'), 
                    how='right')

In [6]:
merged

Unnamed: 0,Unnamed: 0_GWp,Team,Per 90 MinutesxG+xAG_GWp,PerformanceG+A_GWp,Unnamed: 0_GWc,Per 90 MinutesxG+xAG_GWc,PerformanceG+A_GWc
0,0,vs Arsenal,0.71,0,0,1.84,1
1,1,vs Aston Villa,3.37,1,1,1.89,6
2,2,vs Bournemouth,1.84,1,2,2.88,7
3,3,vs Brentford,2.0,0,3,3.23,6
4,4,vs Brighton,0.69,0,4,2.45,4
5,5,vs Chelsea,1.32,3,5,1.93,8
6,6,vs Crystal Palace,2.23,3,6,2.75,8
7,7,vs Everton,2.86,6,7,3.65,19
8,8,vs Fulham,4.46,2,8,2.1,6
9,9,vs Ipswich Town,4.48,3,9,4.0,11


In [7]:
# Function to de-cumulate the gameweek data 
def decumulate(GW_previous, GW_current):

    # Merge two gameweeks on the right (the most current gameweek)
    merged = pd.merge(GW_previous, GW_current, on='Team',
                      suffixes= ('_GWp', '_GWc'),
                      how='right')

    # List of columns to update by subtracting the previous gameweek values
    columns = ['PerformanceG+A']

    # Create a new DataFrame to store the decumulated values
    decumulated_gw = GW_current.copy()

    # Iterate through each column and calculate the actual gameweek value
    for col in columns:
        # Subtract the previous gameweek values from the current ones
        decumulated_gw[col] = merged[f'{col}_GWc'] - merged[f'{col}_GWp'].fillna(0)

    # Return the decumulated gameweek data without modifying GW_current
    return decumulated_gw

In [8]:
# New gameweek list
GW_new_list = []

# Add first gameweek
GW_new_list.append(pd.DataFrame(gameweek_dfs[0]))

# Loop through the remaining gameweeks and decumulate
for i in range(1, len(gameweek_dfs)):
    # Perform the decumulate operation between the previous and current gameweek
    decumulated_gw = decumulate(gameweek_dfs[i - 1], gameweek_dfs[i])

    # Append the result to the GW_new_list
    GW_new_list.append(decumulated_gw)

In [10]:
# Export each individual gameweek to a csv file 
for i in range(1, len(gameweek_dfs)):
    GW = decumulate(gameweek_dfs[i-1], gameweek_dfs[i])
    # Proper f-string formatting for the filename
    GW.to_csv(f'GW_{i + 2}.csv', index=False)