In [1]:
from pyspark.sql import SparkSession

In [2]:
spark = SparkSession\
    .builder\
    .appName("Chess")\
    .master("local[3]")\
    .getOrCreate()

# chess game .csv file data set analysis 

In [3]:
data_frame = spark.read.csv('games.csv', header=True, inferSchema=True)
data_frame.show()


+--------+-----+----------+------------+-----+--------------+------+--------------+------------------+------------+------------------+------------+--------------------+-----------+--------------------+-----------+
|      id|rated|created_at|last_move_at|turns|victory_status|winner|increment_code|          white_id|white_rating|          black_id|black_rating|               moves|opening_eco|        opening_name|opening_ply|
+--------+-----+----------+------------+-----+--------------+------+--------------+------------------+------------+------------------+------------+--------------------+-----------+--------------------+-----------+
|TZJHLljE|false|1.50421E12|  1.50421E12|   13|     outoftime| white|          15+2|          bourgris|        1500|              a-00|        1191|d4 d5 c4 c6 cxd5 ...|        D10|Slav Defense: Exc...|          5|
|l1NXvwaE| true|1.50413E12|  1.50413E12|   16|        resign| black|          5+10|              a-00|        1322|         skinnerua|        12

In [4]:

data_frame.dropna(subset=('turns', 'victory_status')).show()

+--------+-----+----------+------------+-----+--------------+------+--------------+------------------+------------+------------------+------------+--------------------+-----------+--------------------+-----------+
|      id|rated|created_at|last_move_at|turns|victory_status|winner|increment_code|          white_id|white_rating|          black_id|black_rating|               moves|opening_eco|        opening_name|opening_ply|
+--------+-----+----------+------------+-----+--------------+------+--------------+------------------+------------+------------------+------------+--------------------+-----------+--------------------+-----------+
|TZJHLljE|false|1.50421E12|  1.50421E12|   13|     outoftime| white|          15+2|          bourgris|        1500|              a-00|        1191|d4 d5 c4 c6 cxd5 ...|        D10|Slav Defense: Exc...|          5|
|l1NXvwaE| true|1.50413E12|  1.50413E12|   16|        resign| black|          5+10|              a-00|        1322|         skinnerua|        12

#  Average number of turns for winning condition

In [5]:
print("Average number of turns for winning condition")
victory_status = data_frame.groupBy('victory_status').agg({'turns': 'avg'})
victory_status.show()

Average number of turns for winning condition
+--------------+-----------------+
|victory_status|       avg(turns)|
+--------------+-----------------+
|        resign|53.91253251996053|
|     outoftime|72.74285714285715|
|          mate|65.41501976284584|
|          draw|83.78145695364239|
+--------------+-----------------+



In [6]:
victory_status.filter((victory_status['victory_status'] == 'mate')).show()

+--------------+-----------------+
|victory_status|       avg(turns)|
+--------------+-----------------+
|          mate|65.41501976284584|
+--------------+-----------------+



# Average rating of the player

In [7]:
print("Average rating of the player")
player_average = data_frame.dropna(subset=('white_id', 'black_id', 'white_rating', 'white_rating', 'black_rating'))
player_average.show()

Average rating of the player
+--------+-----+----------+------------+-----+--------------+------+--------------+------------------+------------+------------------+------------+--------------------+-----------+--------------------+-----------+
|      id|rated|created_at|last_move_at|turns|victory_status|winner|increment_code|          white_id|white_rating|          black_id|black_rating|               moves|opening_eco|        opening_name|opening_ply|
+--------+-----+----------+------------+-----+--------------+------+--------------+------------------+------------+------------------+------------+--------------------+-----------+--------------------+-----------+
|TZJHLljE|false|1.50421E12|  1.50421E12|   13|     outoftime| white|          15+2|          bourgris|        1500|              a-00|        1191|d4 d5 c4 c6 cxd5 ...|        D10|Slav Defense: Exc...|          5|
|l1NXvwaE| true|1.50413E12|  1.50413E12|   16|        resign| black|          5+10|              a-00|        1322|

In [8]:
player_average.groupBy('white_id', 'black_id').agg({'white_rating': 'avg', 'black_rating': 'avg'}).show()

+-------------------+---------------+-----------------+-----------------+
|           white_id|       black_id|avg(black_rating)|avg(white_rating)|
+-------------------+---------------+-----------------+-----------------+
|           oldpaths|rubberchicken04|           1596.0|           1561.0|
|           oldpaths|         fcolpo|           1470.0|           1579.0|
|               cdvh|     capito2017|           1958.0|           1724.0|
|            pune123|         sassou|           1204.0|           1443.0|
|          dbschultz|     nienkotter|           1358.0|           1500.0|
|            decky84|  roman12342005|           1929.0|           1758.0|
|          garib33gg|theanonymousone|           1332.0|           1224.0|
|mrphaseolusvulgaris|           cmcc|           1500.0|           1712.0|
|            filanif|      joe-brown|            925.0|           1214.0|
|              majek|       mikiduda|           1385.0|           1102.0|
|           dsom1234|         dead23| 

# Average time to finish the game

In [9]:
print("Average time to finish the game: ")
data_frame.groupBy('victory_status').agg({'created_at': 'avg', 'last_move_at': 'avg'}).show()

Average time to finish the game: 
+--------------+--------------------+--------------------+
|victory_status|   avg(last_move_at)|     avg(created_at)|
+--------------+--------------------+--------------------+
|        resign|1.484248735299182...|1.484247937812193E12|
|     outoftime|1.481290611443147...|1.481289290488460...|
|          mate|1.482697338666147...|1.482696538851454...|
|          draw|1.486594622461898...|1.486593213008563E12|
+--------------+--------------------+--------------------+

