In [None]:
import pandas as pd

!pip install pyspark

from pyspark.sql import SparkSession
from pyspark.sql.functions import avg, count, max




# Classement des kills

In [None]:
# Créer une session Spark
spark = SparkSession.builder.appName("ExemplePySpark").getOrCreate()

# Chargement du jeu de données
data_path = "./kill_match_stats_final_0.csv"
df = spark.read.csv(data_path, header=True, inferSchema=True)

# Sélection des colonnes pertinentes (player_name, player_kills, team_placement)
selected_columns = ["killer_name","match_id"]
df_selected = df.select(selected_columns)

df_selected.show()

+----------------+--------------------+
|     killer_name|            match_id|
+----------------+--------------------+
| KrazyPortuguese|2U4GBNA0YmnLSqvEy...|
|nide2Bxiaojiejie|2U4GBNA0YmnLSqvEy...|
|        Ascholes|2U4GBNA0YmnLSqvEy...|
|      Weirdo7777|2U4GBNA0YmnLSqvEy...|
|       Solayuki1|2U4GBNA0YmnLSqvEy...|
|   xuezhiqian717|2U4GBNA0YmnLSqvEy...|
|       pdfjkkvjk|2U4GBNA0YmnLSqvEy...|
|       xiaogao13|2U4GBNA0YmnLSqvEy...|
|       Jingchita|2U4GBNA0YmnLSqvEy...|
|    Alexande-999|2U4GBNA0YmnLSqvEy...|
|    NameLessisME|2U4GBNA0YmnLSqvEy...|
|      Daerljgodi|2U4GBNA0YmnLSqvEy...|
|        JoyFeng-|2U4GBNA0YmnLSqvEy...|
|    NameLessisME|2U4GBNA0YmnLSqvEy...|
|   UrGrandFather|2U4GBNA0YmnLSqvEy...|
|   xuezhiqian717|2U4GBNA0YmnLSqvEy...|
|   FantasticBoys|2U4GBNA0YmnLSqvEy...|
|       PPPIGFEET|2U4GBNA0YmnLSqvEy...|
|       EnGliSh22|2U4GBNA0YmnLSqvEy...|
|        Bookinga|2U4GBNA0YmnLSqvEy...|
+----------------+--------------------+
only showing top 20 rows



In [None]:
# Convert DataFrame to RDD and perform transformations
rdd = df.rdd.map(lambda x: (x["killer_name"], x["match_id"]))

# Calculate the number of games played and the average number of kills per game
rdd = rdd.groupByKey().mapValues(lambda x: (len(x),len(set(x))))

# Filter errors
rdd_kill = rdd.filter(lambda x: x[0] is not None and x[0] != "None")

# Display the result
result = rdd_kill.take(10)
for row in result:
    #print(row)
    print(f"Killer Name: {row[0]}, Number of Games: {row[1][1]:.2f}, Average Kills per Game: {row[1][0]:.2f}")


('KrazyPortuguese', (1, 1))
Killer Name: KrazyPortuguese, Number of Games: 1.00, Average Kills per Game: 1.00
('nide2Bxiaojiejie', (3, 1))
Killer Name: nide2Bxiaojiejie, Number of Games: 1.00, Average Kills per Game: 3.00
('Ascholes', (2, 1))
Killer Name: Ascholes, Number of Games: 1.00, Average Kills per Game: 2.00
('Weirdo7777', (1, 1))
Killer Name: Weirdo7777, Number of Games: 1.00, Average Kills per Game: 1.00
('Solayuki1', (1, 1))
Killer Name: Solayuki1, Number of Games: 1.00, Average Kills per Game: 1.00
('xuezhiqian717', (3, 1))
Killer Name: xuezhiqian717, Number of Games: 1.00, Average Kills per Game: 3.00
('pdfjkkvjk', (1, 1))
Killer Name: pdfjkkvjk, Number of Games: 1.00, Average Kills per Game: 1.00
('xiaogao13', (2, 1))
Killer Name: xiaogao13, Number of Games: 1.00, Average Kills per Game: 2.00
('Jingchita', (1, 1))
Killer Name: Jingchita, Number of Games: 1.00, Average Kills per Game: 1.00
('Alexande-999', (3, 1))
Killer Name: Alexande-999, Number of Games: 1.00, Average K

In [None]:
# Sort the RDD by descending order of average kills
sorted_rdd_kill = rdd_kill.sortBy(lambda x: x[1][0], ascending=False)

# Display the top 10 results
top_10 = sorted_rdd_kill.take(10)
for i, row in enumerate(top_10, start=1):
    print(f"{i}. Killer Name: {row[0]}, Number of Games: {row[1][1]:.2f}, Average Kills per Game: {row[1][0]:.2f}")


1. Killer Name: #unknown, Number of Games: 20.00, Average Kills per Game: 154.00
2. Killer Name: gogolnyg, Number of Games: 1.00, Average Kills per Game: 62.00
3. Killer Name: 651651646, Number of Games: 1.00, Average Kills per Game: 42.00
4. Killer Name: EsNmToging, Number of Games: 1.00, Average Kills per Game: 36.00
5. Killer Name: MoGu1314, Number of Games: 1.00, Average Kills per Game: 25.00
6. Killer Name: s1000r-race, Number of Games: 1.00, Average Kills per Game: 24.00
7. Killer Name: KouBxczG, Number of Games: 1.00, Average Kills per Game: 24.00
8. Killer Name: Hidden-In-Bushes, Number of Games: 2.00, Average Kills per Game: 22.00
9. Killer Name: EVEN1982, Number of Games: 1.00, Average Kills per Game: 20.00
10. Killer Name: A_Dadyo_o, Number of Games: 1.00, Average Kills per Game: 20.00


In [None]:
# Filter players with at least 4 games
filtered_players_kill = sorted_rdd_kill.filter(lambda x: x[1][1] >= 4)

# Process a specific player
specific_player_kill = sorted_rdd_kill.filter(lambda x: x[0] == 'gogolnyg')

print("\nPlayers with at least 4 games:")
filter = filtered_players_kill.take(10)
for i, row in enumerate(filter, start=1):
    print(f"{i}. Killer Name: {row[0]}, Number of Games: {row[1][1]:.2f}, Average Kills per Game: {row[1][0]:.2f}")

print("\nSpecific Player Data:")
specific = specific_player_kill.take(10)
for i, row in enumerate(specific, start=1):
    print(f"{i}. Killer Name: {row[0]}, Number of Games: {row[1][1]:.2f}, Average Kills per Game: {row[1][0]:.2f}")

# Stop the Spark session
spark.stop()


Players with at least 4 games:
1. Killer Name: #unknown, Number of Games: 20.00, Average Kills per Game: 154.00

Specific Player Data:
1. Killer Name: gogolnyg, Number of Games: 1.00, Average Kills per Game: 62.00


#Classement des positions

In [None]:
# Créer une session Spark
spark = SparkSession.builder.appName("ExemplePySpark").getOrCreate()

# Chargement du jeu de données
data_path = "./agg_match_stats_0.csv"
df = spark.read.csv(data_path, header=True, inferSchema=True)

# Sélection des colonnes pertinentes (player_name, player_kills, team_placement)
selected_columns = ["player_name","team_placement","match_id"]
df_selected = df.select(selected_columns)

df_selected.show()

In [None]:
# Convert DataFrame to RDD and perform transformations
rdd = df.rdd.map(lambda x: (x["player_name"],x["team_placement"], x["match_id"]))

# Calculate the number of games played and the average placement per game
rdd = rdd.groupByKey().mapValues(lambda x: x[1]/len(set(x))))

# Filter errors
rdd_pos = rdd.filter(lambda x: x[0] is not None and x[0] != "None")

# Display the result
result = rdd_pos.take(10)
for row in result:
    #print(row)
    print(f"Player Name: {row[0]}, Number of Games: {row[1][1]:.2f}, Average placement per Game: {row[1][0]:.2f}")

In [None]:
# Sort the RDD by descending order of average placement
sorted_rdd_pos = rdd_pos.sortBy(lambda x: x[1][0], ascending=False)

# Display the top 10 results
top_10 = sorted_rdd_pos.take(10)
for i, row in enumerate(top_10, start=1):
    print(f"{i}. Player Name: {row[0]}, Number of Games: {row[1][1]:.2f}, Average placement per Game: {row[1][0]:.2f}")

In [None]:
# Filter players with at least 4 games
filtered_players_pos = sorted_rdd_pos.filter(lambda x: x[1][1] >= 4)

# Process a specific player
specific_player_pos = sorted_rdd_pos.filter(lambda x: x[0] == 'gogolnyg')

print("\nPlayers with at least 4 games:")
filter = filtered_players_pos.take(10)
for i, row in enumerate(filter, start=1):
    print(f"{i}. Player Name: {row[0]}, Number of Games: {row[1][1]:.2f}, Average placement per Game: {row[1][0]:.2f}")

print("\nSpecific Player Data:")
specific = specific_player_pos.take(10)
for i, row in enumerate(specific, start=1):
    print(f"{i}. player Name: {row[0]}, Number of Games: {row[1][1]:.2f}, Average placement per Game: {row[1][0]:.2f}")

# Stop the Spark session
spark.stop()

# Classement combiné

In [None]:
# Function to calculate the score based on specified criteria
def calculate_score(assists, damage, eliminations, placement):
    return 50 * assists + damage + 100 * eliminations + (1000 - 10 * placement)

# Create a Spark session
spark = SparkSession.builder.appName("ExemplePySpark").getOrCreate()

# Load the game data
data_path = "./agg_match_stats_0.csv"
df = spark.read.csv(data_path, header=True, inferSchema=True)

# Select relevant columns
selected_columns = ["player_name", "player_assists", "player_dmg", "player_dbno", "player_kills", "player_placement"]
df_selected = df.select(selected_columns)

# Convert DataFrame to RDD and perform transformations
rdd = df_selected.rdd.map(lambda x: (x["killer_name"], x["player_assists"], x["player_dmg"], x["player_dbno"], x["player_kills"], x["player_placement"]))

# Calculate the score for each player
rdd_scores = rdd.map(lambda x: (x[0], calculate_score(x[1], x[2], x[4], x[5])))

# Sort the RDD by descending order of scores
sorted_rdd_scores = rdd_scores.sortBy(lambda x: x[1], ascending=False)

# Display the top 10 results
top_10_scores = sorted_rdd_scores.take(10)
for i, row in enumerate(top_10_scores, start=1):
    print(f"{i}. Player: {row[0]}, Score: {row[1]:.2f}")

# Stop the Spark session
spark.stop()
