In [30]:
# !pip install pandas

In [31]:
# Libraries
import pandas as pd
input_folder_path = "DataSource/Input/"
output_folder_path = "DataSource/Output/"

#### Game Title Process Method

In [32]:
def title_process (df):
    # remove everything in front of last column
    df['Game Title'] = df['Game Title'].str.replace(' : ', ':', regex = False).str.rsplit(':', n=1).str[-1].str.upper()

    # remove platform
    pattern = '|'.join(platforms)
    df['Game Title'] = df['Game Title'].str.replace(rf'\s*\b({pattern})\b.*', '', regex=True)
    return df

### Sales Data Process

In [33]:
file_name = "Sales.xlsx"
df = pd.read_excel(input_folder_path + file_name, skiprows=4)

# Reset the index
df.columns = df.iloc[0]
df = df.drop(df.index[0])
df = df.reset_index(drop=True)

#cleaning up
df = df[['Game Title', 'Item', 'Platform', 'Sales (USD)']]
platforms = df['Platform'].unique().tolist()
platforms = [str(platform) for platform in platforms]
platforms.append('STEAM')
df.dropna(subset = ['Game Title'], inplace = True)

df = title_process(df)

df = df.groupby('Game Title')['Sales (USD)'].sum().reset_index()


display(df)

Unnamed: 0,Game Title,Sales (USD)
0,WINDS OF ANTHOS,317.3
1,- NO GAME TITLE -,1313.96
2,25TH WARD,1200.88
3,A ROSE IN THE TWILIGHT,195.22
4,AEDIS ECLIPSE-PSN,10.47
...,...,...
222,YS VIII LACRIMOSAOFDANA,22806.01
223,YURU YURI 1,8.92
224,YURUKILL,1524.51
225,ZETTAI HERO,22.67


### COGS data process

In [34]:
# Data Cleanup
COGS_df = pd.read_csv(input_folder_path + "COGS.csv", skiprows=4)
COGS_df.columns = COGS_df.iloc[0]
COGS_df = COGS_df.drop(COGS_df.index[0])
COGS_df = COGS_df.reset_index(drop=True)
COGS_df = COGS_df.dropna(subset = ['Game Title: Name'])


# Grouping by title
COGS_df['Amount'] = COGS_df['Amount'].str.replace('$', '').str.replace(',', '').str.strip().astype(float)
COGS_df = COGS_df.rename(columns = {'Amount': 'COGS Expense'})
COGS_df = COGS_df.rename(columns = {'Game Title: Name': 'Game Title'})
COGS_df = title_process(COGS_df)
COGS_df = COGS_df.groupby('Game Title')['COGS Expense'].sum().reset_index()

display(COGS_df)

Unnamed: 0,Game Title,COGS Expense
0,WINDS OF ANTHOS,208.1
1,- NO GAME TITLE -,17006.0
2,2024-09_REYNATIS EU REPLACE,2261.52
3,CARDCAPTOR SAKURA,285.66
4,DISGAEA 6 COMPLETE,347.99
5,DISGAEA 7,3406.15
6,FALLEN LEGION 2,12.61
7,GOODS,8360.38
8,GRIM GRIMOIRE ONCEMORE,2371.14
9,HANASAKU IROHA 1,64.8


### Capitalized Expense Process

In [35]:
# clean up
CapExp_df = pd.read_excel(input_folder_path + "Capitalized_Expense.xlsx")
CapExp_df['Capitalized Expense Amount (Debit + Credit)'] = CapExp_df['Amount (Debit)'] + CapExp_df['Amount (Credit)']
CapExp_df.dropna(subset = ['Game Title'], inplace = True)

# grouping by title
CapExp_df = title_process(CapExp_df)
CapExp_df = CapExp_df.groupby('Game Title')['Capitalized Expense Amount (Debit + Credit)'].sum().reset_index()
display(CapExp_df)


Unnamed: 0,Game Title,Capitalized Expense Amount (Debit + Credit)
0,REYNATIS,35608.15
1,TRAILS THROUGH DAYBREAK,321962.18


### AD Promotion Data Process

In [36]:
# clean up
AD_df = pd.read_excel(input_folder_path + "AD_Promotion.xlsx", skiprows=4)
AD_df.columns = AD_df.iloc[0]
AD_df = AD_df.drop(AD_df.index[0])
AD_df = AD_df.reset_index(drop=True)
AD_df = AD_df.dropna(subset = ['Game Title: Name'])

# grouping by title
AD_df = AD_df.rename(columns = {'Amount': 'AD Promotion'})
AD_df = AD_df.rename(columns = {'Game Title: Name': 'Game Title'})
AD_df = title_process(AD_df)
AD_df = AD_df.groupby('Game Title')['AD Promotion'].sum().reset_index()


display(AD_df)

Unnamed: 0,Game Title,AD Promotion
0,NORDICS,2960.0
1,- NO GAME TITLE -,4495.77
2,2024-07_ANIME EXPO 2024,386.02
3,2024-08_PAX WEST,53860.14
4,PHANTOM BRAVE 1,1058.97
5,PHANTOM BRAVE 2,9.25


### Royalty Expense Process

In [37]:
# get data
Royalty_df = pd.read_excel(input_folder_path + "Royalty.xlsx", sheet_name='Import_Expense')
Royalty_df.rename(columns = {'game_title': 'Game Title'}, inplace = True)
Royalty_df['Game Title'] = Royalty_df['Game Title'].str.replace(' : ', ':', regex = False).str.rsplit(':', n=1).str[-1]
Royalty_df.dropna(subset = ['Game Title'], inplace = True)


# grouping by title
Royalty_df = title_process(Royalty_df)
Royalty_df = Royalty_df.groupby('Game Title')['sales'].sum().reset_index()
Royalty_df.rename(columns = {'sales': 'Royalty Expense'}, inplace = True)

display(Royalty_df)

Unnamed: 0,Game Title,Royalty Expense
0,25TH WARD,2608.44
1,AKIBA TRIP 2,654.20
2,ARCANA HEART 3 PAL,66.83
3,ASSAULT SPY,2652.64
4,ATELIER MERURU,13.98
...,...,...
111,YS IX MONSTRUM NOX EPIC,115.39
112,YS MEMORIES OF CELCETA,37.58
113,YS VIII,11014.07
114,YS VIII LACRIMOSA OF DANA,69175.76


### Merging  and Export Data

In [38]:
# Merging
df = pd.merge(df, COGS_df, how='left', on='Game Title')
df = pd.merge(df, Royalty_df, how='left', on='Game Title')
df = pd.merge(df, CapExp_df, how='left', on='Game Title')
df = pd.merge(df, AD_df, how='left', on='Game Title')

# Calculate profit
df['Sales (USD)'] = df['Sales (USD)'].astype(float)
df['Profit'] = df['Sales (USD)'].fillna(0) - df['COGS Expense'].fillna(0) - df['Royalty Expense'].fillna(0) - df['Capitalized Expense Amount (Debit + Credit)'].fillna(0) - df['AD Promotion'].fillna(0)

display(df)

  df['Profit'] = df['Sales (USD)'].fillna(0) - df['COGS Expense'].fillna(0) - df['Royalty Expense'].fillna(0) - df['Capitalized Expense Amount (Debit + Credit)'].fillna(0) - df['AD Promotion'].fillna(0)


Unnamed: 0,Game Title,Sales (USD),COGS Expense,Royalty Expense,Capitalized Expense Amount (Debit + Credit),AD Promotion,Profit
0,WINDS OF ANTHOS,317.30,208.10,,,,109.20
1,- NO GAME TITLE -,1313.96,17006.00,,,4495.77,-20187.81
2,25TH WARD,1200.88,,2608.44,,,-1407.56
3,A ROSE IN THE TWILIGHT,195.22,,,,,195.22
4,AEDIS ECLIPSE-PSN,10.47,,,,,10.47
...,...,...,...,...,...,...,...
222,YS VIII LACRIMOSAOFDANA,22806.01,12894.61,,,,9911.40
223,YURU YURI 1,8.92,,,,,8.92
224,YURUKILL,1524.51,50.82,1068.61,,,405.08
225,ZETTAI HERO,22.67,,,,,22.67


In [39]:
df.to_csv(output_folder_path + 'Sales and Profit Data.csv', index = True)