# Gold Layer ðŸ¥‡

## Step 1: Import

In [None]:
import pandas as pd
import numpy as np
GOLD_PATH = '/gold' # your file path


In [None]:
df = pd.read_parquet('/silver.parquet') # read your silver parquet


## Step 2: Preview the Data

In [None]:
df.head()


In [None]:
df.info()


### Report 1: The total team points, average rank and total Coins Spent
A report that shows the total team points, average rank and total Coins Spent in Toad Town earned by each team. For rank, consider {'S': 5, 'A': 4, 'B': 3, 'C': 2, 'D': 1} 


In [None]:
rank_numeric = {'S': 5, 'A': 4, 'B': 3, 'C': 2, 'D': 1}
df['Kart Racing Rank_Numeric'] = (
    df['Kart Racing Rank']
      .astype('string').str.strip().str.upper()
      .map(rank_numeric)
      .astype('Int32')
)

report1 = (
    df.groupby('Team', as_index=False)
      .agg(
          **{
              'Total Team Points': ('Team Points', 'sum'),
              'Total Coins Spent': ('Coins Spent in Toad Town', 'sum'),
              'Avg Kart Rank (num)': ('Kart Racing Rank_Numeric', 'mean'),
          }
      )
)

rank_inverse = {v: k for k, v in rank_numeric.items()}
report1['Avg Kart Rank (num)'] = report1['Avg Kart Rank (num)'].round(2)
def avg_to_letter(x):
    if x >= 5:
        return 'S'
    elif x >= 4:
        return 'A'
    elif x >= 3:
        return 'B'
    elif x >= 2:
        return 'C'
    else:
        return 'D'
    
report1['Avg Kart Rank (letter)'] = report1['Avg Kart Rank (num)'].apply(avg_to_letter)


report1 = (
    report1.sort_values(['Total Team Points', 'Team'], ascending=[False, True], kind='mergesort')
           .reset_index(drop=True)
)

print("\nReport 1 â€” Total team points, total coins, and average kart racing rank:\n", report1)

report1.to_parquet(f'gold/gold_team_summary.parquet', index=False)
print('report1 saved successfully')







### Report2: The total team points earned by each player sorted by points descending. 
A report that shows total team points earned by each player sorted by points descending. 

In [None]:
valid_players = df['Player Name'].notna() & (df['Player Name'] != 'Unknown Player') 
total_team_points = (
    df.loc[valid_players]                                                   
      .groupby('Player Name', as_index=False)
      .agg(
          **{'Total Team Points': ('Team Points', 'sum')
            }
      )
      .sort_values('Total Team Points', ascending=False)
      .reset_index(drop=True)
)


print("\nTotal team points earned by each player (descending):\n", total_team_points)
total_team_points.to_parquet(f'gold/gold_player_performance.parquet', index=False)
print('report2 saved successfully')



### Report3: The count and average number of power-ups used per player.
A report showing the count and average number of power-ups used per player. 

In [None]:
valid_players = df['Player Name'].notna() & (df['Player Name'] != 'Unknown Player') 
report3 = (
    df.loc[valid_players]                                                   
      .groupby('Player Name', as_index=False)
      .agg(
          **{
              'Power-Ups Count': ('Power-Ups Used', 'count'),
              'Power-Ups Avg': ('Power-Ups Used', 'mean')
          }
          )
      .sort_values('Player Name', ascending=True)
      .reset_index(drop=True)
      )

print("\nCount and average Power-Ups used by each player:\n", report3)

report3.to_parquet(f'/gold/gold_powerup_player.parquet', index=False)
print('report3 saved successfully')

### Report4: The average number of lives lost in each world in descending order. 
A report showing the average number of lives lost in each world in descending order. 

In [None]:
lives_lost_avg = (
    df.groupby('World', as_index=False)
      .agg(
          **{
              'Lives Lost Avg': ('Lives Lost', 'mean')
          }
      )
      .sort_values('Lives Lost Avg', ascending=False)
      .reset_index(drop=True) 
)

print("\nAverage number of lives lost in each world:\n", lives_lost_avg)

lives_lost_avg.to_parquet(f'/gold/gold_world_difficulty..parquet', index=False)
print('report4 saved successfully')

### Report 5:  The top player per team by team points. 
A report showing the top player per team by team points. 

In [None]:
valid_players = df['Player Name'].notna() & (df['Player Name'] != 'Unknown Player')
team_player_points = (
    df.loc[valid_players]
      .groupby(['Team', 'Player Name'], as_index=False)
      .agg(
          **{
              'Total Team Points': ('Team Points', 'sum')
          }
      )
) 

top_player = (
    team_player_points
      .sort_values(['Team', 'Total Team Points'], ascending=[True, False])
      .groupby('Team', as_index=False)
      .head(1)
      .reset_index(drop=True)
)

print(f"Top players per team:\n", top_player)
top_player.to_parquet(f'/gold/gold_top_player_per_team.parquet', index=False)
print('report5 saved successfully')


### Report6: The popularity of each vehicle type 
A report showing the popularity of each vehicle type 

In [None]:
popularity_vehicle = (
    df.groupby('Vehicle Type', as_index=False)
    .agg(
        **{
            'Popularity': ('Vehicle Type', 'count')
        
        }
    )
    .sort_values('Popularity', ascending=False)
    .reset_index(drop=True)
)

print(f"The popularity of each vehicle type:\n", popularity_vehicle)
popularity_vehicle.to_parquet(f'/gold/gold_vehicle_counts.parquet', index=False)
print('report6 saved successfully')

### Report7: The total number of lives lost for each player, sorted from most to least. 
A report showing which players are at the highest risk by listing the total number of lives lost for each player, sorted from most to least. 

In [None]:
valid_players = df['Player Name'].notna() & (df['Player Name'] != 'Unknown Player')
total_lives_lost = (
    df.loc[valid_players]
      .groupby('Player Name', as_index=False)
      .agg(
          **{
              'Total Lives Lost': ('Lives Lost', 'sum')
          }
      )
      .sort_values('Total Lives Lost', ascending=False)
      .reset_index(drop=True)
) # type: ignore

print(f"The total number of lives lost for each player:\n", total_lives_lost)

total_lives_lost.to_parquet(f'/gold/gold_risk_assessment.parquet', index=False)
print('report7 saved successfully')



### Report8: The average and total number of levels completed in each world. 
A report showing the average and total number of levels completed in each world. 

In [None]:
report8 = (
    df.groupby('World', as_index=False)
      .agg(
          **{'Total_levels_completed': ('Levels Completed', 'sum'),
             'Avg_levels_completed': ('Levels Completed', 'mean')
             }
      )
      .sort_values('Total_levels_completed', ascending=False)
    .reset_index(drop=True)
)
print(f"The total and average number of levels completed in each world:\n", report8)

report8.to_parquet(f'/gold/gold_world_completion.parquet', index=False)
print('report8 saved successfully')


### Report9: The total and average number of times each team was hit by enemies. 
A report showing the total and average number of times each team was hit by enemies. 

In [None]:
report9 = (
    df.groupby('Team', as_index=False)
    .agg(
        **{'Total times being hit': ('Times Hit by Enemies', 'sum'),
           'Avg times being hit': ('Times Hit by Enemies', 'mean')
           }
    )
    .sort_values('Total times being hit', ascending=False)
    .reset_index(drop=True)
)

print(f"The total and average number of times each team was hit by enemies:\n", report9)

report9.to_parquet(f'/gold/gold_hits_team.parquet', index=False)
print('report9 saved successfully')


### Report10: The total coins spent in Toad Town by each team
A report showing the total coins spent in Toad Town by each team. 

In [None]:
total_coins_spent = (
    df.groupby('Team', as_index=False)
    .agg(
        **{
            'Total Coins Spent': ('Coins Spent in Toad Town', 'sum')
        }
    )
    .sort_values('Total Coins Spent', ascending=False)
    .reset_index(drop=True)
)

print(f"The total coins spent in Toad Town by each team:\n", total_coins_spent)

total_coins_spent.to_parquet(f'/gold/gold_spending_analysis.parquet', index=False)
print('report10 saved successfully')
