In [90]:
from turtledemo.sorting_animate import disable_keys

# Libraries
import pandas as pd
input_folder_path = "DataSource/Input/"
output_folder_path = "DataSource/Output/"

### Sales Data Process

In [91]:
file_name = "Sales.xlsx"
df = pd.read_excel(input_folder_path + file_name, skiprows=4)

# Reset the index
df.columns = df.iloc[0]
df = df.drop(df.index[0])
df = df.reset_index(drop=True)

#cleaning up
df = df[['Game Title', 'Item', 'Sales (USD)']]
df.dropna(subset = ['Game Title'], inplace = True)
df = df.groupby('Game Title')['Sales (USD)'].sum().reset_index()
df['Game Title'] = df['Game Title'].str.rsplit(':', n=1).str[-1]
display(df)

Unnamed: 0,Game Title,Sales (USD)
0,- No Game Title -,1313.96
1,Genso Wanderer PS4,0
2,Genso Wanderer PS4 DIGITAL,50.65
3,Genso Wanderer PS4 PAL,58.72
4,Operation Abyss STEAM,82.68
...,...,...
392,Yomawari Night Alone,3.52
393,Yomawari Night Alone PSV PAL,35.76
394,Yomawari Night Alone PSV US,34.57
395,Ys Memories of Celceta PSV PAL,50.78


### COGS data process

In [92]:
# Data Cleanup
COGS_df = pd.read_csv(input_folder_path + "COGS.csv", skiprows=4)
COGS_df.columns = COGS_df.iloc[0]
COGS_df = COGS_df.drop(COGS_df.index[0])
COGS_df = COGS_df.reset_index(drop=True)
COGS_df = COGS_df.dropna(subset = ['Game Title: Name'])


# Grouping by title
COGS_df['Game Title: Name'] = COGS_df['Game Title: Name'].str.replace(' : ', ':', regex = False).str.rsplit(':', n=1).str[-1]
COGS_df['Amount'] = COGS_df['Amount'].str.replace('$', '').str.replace(',', '').str.strip().astype(float)
COGS_df = COGS_df.rename(columns = {'Amount': 'COGS Expense'})
COGS_df = COGS_df.rename(columns = {'Game Title: Name': 'Game Title'})
COGS_df = COGS_df.groupby('Game Title')['COGS Expense'].sum().reset_index()

display(COGS_df)

Unnamed: 0,Game Title,COGS Expense
0,Winds of Anthos NSW,104.2
1,Winds of Anthos PS4,103.9
2,- No Game Title -,17006.0
3,2024-09_REYNATIS EU Replace,2261.52
4,Cardcaptor Sakura,285.66
5,Disgaea 6 Complete PS4,160.45
6,Disgaea 6 Complete PS5,187.54
7,Disgaea 7 NSW,2670.77
8,Disgaea 7 PS4,184.14
9,Disgaea 7 PS5,551.24


### Capitalized Expense Process

In [93]:
# clean up
CapExp_df = pd.read_excel(input_folder_path + "Capitalized_Expense.xlsx")
CapExp_df['Capitalized Expense Amount (Debit + Credit)'] = CapExp_df['Amount (Debit)'] + CapExp_df['Amount (Credit)']
CapExp_df.dropna(subset = ['Game Title'], inplace = True)

# grouping by title
CapExp_df = CapExp_df.groupby('Game Title')['Capitalized Expense Amount (Debit + Credit)'].sum().reset_index()
CapExp_df['Game Title'] = CapExp_df['Game Title'].str.replace(' : ', ':', regex = False).str.rsplit(':', n=1).str[-1]
display(CapExp_df)

Unnamed: 0,Game Title,Capitalized Expense Amount (Debit + Credit)
0,Trails through Daybreak STEAM,81557.49
1,Trails through Daybreak,240404.69
2,REYNATIS,35608.15


### AD Promotion Data Process

In [94]:
# clean up
AD_df = pd.read_excel(input_folder_path + "AD_Promotion.xlsx", skiprows=4)
AD_df.columns = AD_df.iloc[0]
AD_df = AD_df.drop(AD_df.index[0])
AD_df = AD_df.reset_index(drop=True)
AD_df = AD_df.dropna(subset = ['Game Title: Name'])

# grouping by title
AD_df = AD_df.groupby('Game Title: Name')['Amount'].sum().reset_index()
AD_df = AD_df.rename(columns = {'Amount': 'AD Promotion'})
AD_df = AD_df.rename(columns = {'Game Title: Name': 'Game Title'})

display(AD_df)

Unnamed: 0,Game Title,AD Promotion
0,- No Game Title -,4495.77
1,2024-07_Anime Expo 2024,386.02
2,2024-08_PAX West,53860.14
3,Phantom Brave 1 PS5,1058.97
4,Phantom Brave 2,9.25
5,Ys X: Nordics,2960.0


### Royalty Expense Process

In [95]:
# get data
Royalty_df = pd.read_excel(input_folder_path + "Royalty.xlsx", sheet_name='Import_Expense')
Royalty_df.rename(columns = {'game_title': 'Game Title'}, inplace = True)
Royalty_df['Game Title'] = Royalty_df['Game Title'].str.replace(' : ', ':', regex = False).str.rsplit(':', n=1).str[-1]
Royalty_df.dropna(subset = ['Game Title'], inplace = True)


# grouping by title
Royalty_df = Royalty_df.groupby('Game Title')['sales'].sum().reset_index()
Royalty_df.rename(columns = {'sales': 'Royalty Expense'}, inplace = True)

display(Royalty_df)

Unnamed: 0,Game Title,Royalty Expense
0,25th Ward PS4 PAL,216.75
1,25th Ward STEAM,2391.69
2,Akiba Trip 2,654.20
3,Arcana Heart 3 PAL PS3,66.83
4,Assault Spy,2652.64
...,...,...
207,Ys VIII PSV,96.43
208,Ys VIII PSV PAL,140.02
209,Yurukill NSW,874.39
210,Yurukill PS5,194.22


### Merging  and Export Data

In [96]:
# Merging
df = pd.merge(df, COGS_df, how='left', on='Game Title')
df = pd.merge(df, Royalty_df, how='left', on='Game Title')
df = pd.merge(df, CapExp_df, how='left', on='Game Title')
df = pd.merge(df, AD_df, how='left', on='Game Title')

# Calculate profit
df['Sales (USD)'] = df['Sales (USD)'].astype(float)
df['Profit'] = df['Sales (USD)'].fillna(0) - df['COGS Expense'].fillna(0) - df['Royalty Expense'].fillna(0) - df['Capitalized Expense Amount (Debit + Credit)'].fillna(0) - df['AD Promotion'].fillna(0)

display(df)

  df['Profit'] = df['Sales (USD)'].fillna(0) - df['COGS Expense'].fillna(0) - df['Royalty Expense'].fillna(0) - df['Capitalized Expense Amount (Debit + Credit)'].fillna(0) - df['AD Promotion'].fillna(0)


Unnamed: 0,Game Title,Sales (USD),COGS Expense,Royalty Expense,Capitalized Expense Amount (Debit + Credit),AD Promotion,Profit
0,- No Game Title -,1313.96,17006.0,,,4495.77,-20187.81
1,Genso Wanderer PS4,0.00,,41.98,,,-41.98
2,Genso Wanderer PS4 DIGITAL,50.65,,1.39,,,49.26
3,Genso Wanderer PS4 PAL,58.72,,1.29,,,57.43
4,Operation Abyss STEAM,82.68,,641.60,,,-558.92
...,...,...,...,...,...,...,...
392,Yomawari Night Alone,3.52,,,,,3.52
393,Yomawari Night Alone PSV PAL,35.76,,,,,35.76
394,Yomawari Night Alone PSV US,34.57,,,,,34.57
395,Ys Memories of Celceta PSV PAL,50.78,,37.58,,,13.20


In [98]:
df.to_csv(output_folder_path + 'Sales and Profit Data.csv', index = True)