In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window

def create_spark_session():
    spark = SparkSession.builder \
        .appName("KullaniciVeriAnalizi") \
        .master("local[*]") \
        .getOrCreate()
    print("✅ Spark oturumu başlatıldı. Sürüm:", spark.version)
    return spark

def read_parquet_file(spark, path="kullanici_verileri.parquet"):
    df = spark.read.parquet(path)
    df = df.withColumn("dob", to_timestamp("dob"))
    df = df.withColumn("registered", to_timestamp("registered"))
    df = df.withColumn("yas", datediff(current_date(), col("dob")) / 365.25)
    df = df.withColumn("kullanim_suresi", datediff(current_date(), col("registered")))
    df.createOrReplaceTempView("kullanicilar")
    return df

def question_1_dataframe_api(df):
    df.groupBy("gender").agg(
        round(avg("yas"), 2).alias("ortalama_yas"),
        round(avg("kullanim_suresi"), 2).alias("ortalama_kullanim_suresi")
    ).show()

def question_1_spark_sql():
    spark.sql("""
        SELECT
            gender,
            ROUND(AVG(yas), 2) AS ortalama_yas,
            ROUND(AVG(kullanim_suresi), 2) AS ortalama_kullanim_suresi
        FROM kullanicilar
        GROUP BY gender
    """).show()

def question_2_dataframe_api(df):
    df.groupBy("country", "gender").agg(
        round(avg("yas"), 2).alias("ortalama_yas"),
        round(avg("kullanim_suresi"), 2).alias("ortalama_kullanim_suresi")
    ).orderBy("country", "gender").show(20, truncate=False)

def question_2_spark_sql():
    spark.sql("""
        SELECT
            country,
            gender,
            ROUND(AVG(yas), 2) AS ortalama_yas,
            ROUND(AVG(kullanim_suresi), 2) AS ortalama_kullanim_suresi
        FROM kullanicilar
        GROUP BY country, gender
        ORDER BY country, gender
    """).show()

def question_3_dataframe_api(df):
    pencere = Window.partitionBy("country", "gender").orderBy(col("yas").desc())
    df_sirali = df.withColumn("sira", dense_rank().over(pencere))
    en_yaslilar_df = df_sirali.filter(col("sira") <= 3)
    en_yaslilar_df.select("country", "gender", "name", "yas", "sira") \
        .orderBy("country", "gender", "sira") \
        .show(18, truncate=False)

def question_3_spark_sql():
    spark.sql("""
        SELECT *
        FROM (
            SELECT
                country,
                gender,
                name,
                yas,
                DENSE_RANK() OVER (PARTITION BY country, gender ORDER BY yas DESC) AS sira
            FROM kullanicilar
        ) alt_tablo
        WHERE sira <= 3
        ORDER BY country, gender, sira
    """).show(18, truncate=False)

if __name__ == "__main__":
    spark = create_spark_session()
    df = read_parquet_file(spark)
    print("\n📌 Soru 1 - DataFrame API")
    question_1_dataframe_api(df)
    print("\n📌 Soru 1 - Spark SQL")
    question_1_spark_sql()
    print("\n📌 Soru 2 - DataFrame API")
    question_2_dataframe_api(df)
    print("\n📌 Soru 2 - Spark SQL")
    question_2_spark_sql()
    print("\n📌 Soru 3 - DataFrame API")
    question_3_dataframe_api(df)
    print("\n📌 Soru 3 - Spark SQL")
    question_3_spark_sql()

    spark.stop()


✅ Spark oturumu başlatıldı. Sürüm: 3.5.4

📌 Soru 1 - DataFrame API
+------+------------+------------------------+
|gender|ortalama_yas|ortalama_kullanim_suresi|
+------+------------+------------------------+
|female|       50.37|                 4668.55|
|  male|       52.99|                 4757.53|
+------+------------+------------------------+


📌 Soru 1 - Spark SQL
+------+------------+------------------------+
|gender|ortalama_yas|ortalama_kullanim_suresi|
+------+------------+------------------------+
|female|       50.37|                 4668.55|
|  male|       52.99|                 4757.53|
+------+------------+------------------------+


📌 Soru 2 - DataFrame API
+---------+------+------------+------------------------+
|country  |gender|ortalama_yas|ortalama_kullanim_suresi|
+---------+------+------------+------------------------+
|Australia|female|60.34       |4666.4                  |
|Australia|male  |51.64       |5382.2                  |
|Brazil   |female|50.57       |399