# Operations at Airports
Daily IFR arrivals and departures by airport

EUROCONTROL is a pan-European, civil-military organisation dedicated to supporting European aviation.

# Install and Import necessary Libraries and Pyspark

In [None]:
# Install PySpark in Google Colab
!pip install pyspark



In [None]:
#Import the pySpark
import pyspark

#Import necessary libraries
from pyspark.sql import SparkSession
from pyspark.sql.functions import *

#Creating a Spark Session
spark = SparkSession.builder.appName("Airport_Traffic").getOrCreate()

In [None]:
#Mounting google drive to access dataset
from google.colab import drive

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# Loading the Dataset from Google Drive

In [None]:
#Loading the dataset
file_path = "/content/gdrive/MyDrive/Colab Notebooks/Second Project/airport_traffic_2024.csv"

# Read the CSV file into a DataFrame
df = spark.read.csv(file_path, header=True, inferSchema=True)

# Show the first few rows
df.show(5)

# Display the schema of the dataset
df.printSchema()

+----+---------+---------+----------+--------+----------+----------+---------+---------+---------+-------------+-------------+-------------+
|YEAR|MONTH_NUM|MONTH_MON|  FLT_DATE|APT_ICAO|  APT_NAME|STATE_NAME|FLT_DEP_1|FLT_ARR_1|FLT_TOT_1|FLT_DEP_IFR_2|FLT_ARR_IFR_2|FLT_TOT_IFR_2|
+----+---------+---------+----------+--------+----------+----------+---------+---------+---------+-------------+-------------+-------------+
|2024|        1|      JAN|2024-01-01|    LATI|    Tirana|   Albania|       73|       74|      147|         NULL|         NULL|         NULL|
|2024|        1|      JAN|2024-01-01|    UDYZ|   Yerevan|   Armenia|       52|       48|      100|         NULL|         NULL|         NULL|
|2024|        1|      JAN|2024-01-01|    LOWG|      Graz|   Austria|        6|        8|       14|         NULL|         NULL|         NULL|
|2024|        1|      JAN|2024-01-01|    LOWI| Innsbruck|   Austria|       22|       25|       47|         NULL|         NULL|         NULL|
|2024|       

# Some Exploration

In [None]:
#number of columns
len(df.columns)

13

In [None]:
#return the total number of rows (or records) present in the df DataFrame.
df.count()

114754

In [None]:
# to count the number of unique rows in a DataFrame called df.
df.distinct().count()

114754

## Selecting Specific Month

In [None]:
df_december = df.filter(col("MONTH_NUM") == 12)
df_december.show(5)

+----+---------+---------+----------+--------+----------+----------+---------+---------+---------+-------------+-------------+-------------+
|YEAR|MONTH_NUM|MONTH_MON|  FLT_DATE|APT_ICAO|  APT_NAME|STATE_NAME|FLT_DEP_1|FLT_ARR_1|FLT_TOT_1|FLT_DEP_IFR_2|FLT_ARR_IFR_2|FLT_TOT_IFR_2|
+----+---------+---------+----------+--------+----------+----------+---------+---------+---------+-------------+-------------+-------------+
|2024|       12|      DEC|2024-12-01|    LATI|    Tirana|   Albania|       79|       81|      160|         NULL|         NULL|         NULL|
|2024|       12|      DEC|2024-12-01|    UDYZ|   Yerevan|   Armenia|       56|       52|      108|         NULL|         NULL|         NULL|
|2024|       12|      DEC|2024-12-01|    LOWG|      Graz|   Austria|       11|       14|       25|         NULL|         NULL|         NULL|
|2024|       12|      DEC|2024-12-01|    LOWI| Innsbruck|   Austria|       20|       15|       35|         NULL|         NULL|         NULL|
|2024|       

In [None]:
#return the total number of rows (or records) present in the df DataFrame.
df_december.count()

9477

In [None]:
# to count the number of unique rows in a DataFrame called df.
df_december.distinct().count()

9477

In [None]:
df_december.groupBy("STATE_NAME").count().show()

+--------------------+-----+
|          STATE_NAME|count|
+--------------------+-----+
|              Sweden|   92|
|             Germany|  460|
|              France| 1790|
|              Greece|  356|
|Republic of North...|   31|
|            Slovakia|   31|
|             Belgium|  155|
|             Albania|   31|
|             Finland|   31|
|             Türkiye|  186|
|               Malta|   31|
|             Croatia|   34|
|               Italy|  434|
|           Lithuania|  124|
|              Norway| 1261|
|               Spain| 1472|
|             Denmark|   31|
|             Ireland|  217|
|             Morocco|  186|
|             Iceland|   31|
+--------------------+-----+
only showing top 20 rows



In [None]:
#Ordering the number of Airports by State
df_december.groupBy("STATE_NAME").count().orderBy(desc("count")).show()

+--------------+-----+
|    STATE_NAME|count|
+--------------+-----+
|        France| 1790|
|         Spain| 1472|
|        Norway| 1261|
|United Kingdom|  582|
|       Germany|  460|
|        Poland|  439|
|         Italy|  434|
|        Greece|  356|
|      Portugal|  323|
|       Ireland|  217|
|       Türkiye|  186|
|       Morocco|  186|
|       Austria|  186|
|       Belgium|  155|
|   Netherlands|  146|
|     Lithuania|  124|
|Czech Republic|  111|
|   Switzerland|   93|
|        Sweden|   92|
|      Slovenia|   82|
+--------------+-----+
only showing top 20 rows



In [None]:
#Total number of flights
df_december_summary = df_december.groupBy("STATE_NAME").agg(
    sum("FLT_TOT_1").alias("Total December Flights")
).orderBy(col("Total December Flights").desc()).show()

+--------------+----------------------+
|    STATE_NAME|Total December Flights|
+--------------+----------------------+
|         Spain|                167985|
|United Kingdom|                150442|
|        France|                126070|
|       Germany|                119544|
|         Italy|                 96261|
|       Türkiye|                 84267|
|        Norway|                 48869|
|   Netherlands|                 41261|
|        Poland|                 34997|
|   Switzerland|                 34996|
|      Portugal|                 34375|
|        Greece|                 26850|
|       Belgium|                 24443|
|       Austria|                 24136|
|       Ireland|                 21391|
|       Morocco|                 20366|
|        Sweden|                 18649|
|       Denmark|                 17268|
|       Finland|                 12172|
|Czech Republic|                 11261|
+--------------+----------------------+
only showing top 20 rows



This shows us busiest state is Spain and France has most airports. However when we check Google, total number of airports in France is 169. That means we have some issues with our calculation.

In [None]:
#Let's count again. Now, we will use "countDistinct" to calculate the unique airports per state
df_december.groupBy("STATE_NAME").agg(
    countDistinct("APT_ICAO").alias("Unique Airports")
).orderBy(col("Unique Airports").desc()).show()

+--------------+---------------+
|    STATE_NAME|Unique Airports|
+--------------+---------------+
|        France|             63|
|         Spain|             55|
|        Norway|             43|
|United Kingdom|             19|
|       Germany|             15|
|        Poland|             15|
|         Italy|             14|
|        Greece|             13|
|      Portugal|             11|
|       Ireland|             10|
|       Türkiye|              6|
|       Morocco|              6|
|       Austria|              6|
|       Belgium|              5|
|   Netherlands|              5|
|     Lithuania|              4|
|Czech Republic|              4|
|        Sweden|              3|
|   Switzerland|              3|
|      Slovenia|              3|
+--------------+---------------+
only showing top 20 rows



In [None]:
df_december_Türkiye = df_december.filter(col("STATE_NAME") == "Türkiye")
df_december_Türkiye.show()

+----+---------+---------+----------+--------+--------------------+----------+---------+---------+---------+-------------+-------------+-------------+
|YEAR|MONTH_NUM|MONTH_MON|  FLT_DATE|APT_ICAO|            APT_NAME|STATE_NAME|FLT_DEP_1|FLT_ARR_1|FLT_TOT_1|FLT_DEP_IFR_2|FLT_ARR_IFR_2|FLT_TOT_IFR_2|
+----+---------+---------+----------+--------+--------------------+----------+---------+---------+---------+-------------+-------------+-------------+
|2024|       12|      DEC|2024-12-01|    LTAC|   Ankara - Esenboğa|   Türkiye|      123|      112|      235|         NULL|         NULL|         NULL|
|2024|       12|      DEC|2024-12-01|    LTAI|             Antalya|   Türkiye|      141|      123|      264|         NULL|         NULL|         NULL|
|2024|       12|      DEC|2024-12-01|    LTBA|    Istanbul Atatürk|   Türkiye|       15|       27|       42|         NULL|         NULL|         NULL|
|2024|       12|      DEC|2024-12-01|    LTBJ|Izmir - Adnan Men...|   Türkiye|       82|      

In [None]:
df_december_Türkiye.groupBy("APT_NAME").agg(
    countDistinct("APT_NAME").alias("Airport Numbers")).show()

+--------------------+---------------+
|            APT_NAME|Airport Numbers|
+--------------------+---------------+
|   Ankara - Esenboğa|              1|
|             Antalya|              1|
|Istanbul Sabiha G...|              1|
|            Istanbul|              1|
|Izmir - Adnan Men...|              1|
|    Istanbul Atatürk|              1|
+--------------------+---------------+



In [None]:
df_december_Türkiye.agg(countDistinct("APT_NAME").alias("Airport Numbers")).show()

+---------------+
|Airport Numbers|
+---------------+
|              6|
+---------------+



In [None]:
df_december_Türkiye.show()

+----+---------+---------+----------+--------+--------------------+----------+---------+---------+---------+-------------+-------------+-------------+
|YEAR|MONTH_NUM|MONTH_MON|  FLT_DATE|APT_ICAO|            APT_NAME|STATE_NAME|FLT_DEP_1|FLT_ARR_1|FLT_TOT_1|FLT_DEP_IFR_2|FLT_ARR_IFR_2|FLT_TOT_IFR_2|
+----+---------+---------+----------+--------+--------------------+----------+---------+---------+---------+-------------+-------------+-------------+
|2024|       12|      DEC|2024-12-01|    LTAC|   Ankara - Esenboğa|   Türkiye|      123|      112|      235|         NULL|         NULL|         NULL|
|2024|       12|      DEC|2024-12-01|    LTAI|             Antalya|   Türkiye|      141|      123|      264|         NULL|         NULL|         NULL|
|2024|       12|      DEC|2024-12-01|    LTBA|    Istanbul Atatürk|   Türkiye|       15|       27|       42|         NULL|         NULL|         NULL|
|2024|       12|      DEC|2024-12-01|    LTBJ|Izmir - Adnan Men...|   Türkiye|       82|      

In [None]:
df_december_Türkiye.filter(col("FLT_TOT_1") > 100).orderBy(desc("FLT_TOT_1")).show()

+----+---------+---------+----------+--------+--------+----------+---------+---------+---------+-------------+-------------+-------------+
|YEAR|MONTH_NUM|MONTH_MON|  FLT_DATE|APT_ICAO|APT_NAME|STATE_NAME|FLT_DEP_1|FLT_ARR_1|FLT_TOT_1|FLT_DEP_IFR_2|FLT_ARR_IFR_2|FLT_TOT_IFR_2|
+----+---------+---------+----------+--------+--------+----------+---------+---------+---------+-------------+-------------+-------------+
|2024|       12|      DEC|2024-12-22|    LTFM|Istanbul|   Türkiye|      724|      727|     1451|          722|          731|         1453|
|2024|       12|      DEC|2024-12-29|    LTFM|Istanbul|   Türkiye|      712|      720|     1432|          716|          729|         1445|
|2024|       12|      DEC|2024-12-20|    LTFM|Istanbul|   Türkiye|      715|      717|     1432|          719|          724|         1443|
|2024|       12|      DEC|2024-12-27|    LTFM|Istanbul|   Türkiye|      706|      721|     1427|          712|          725|         1437|
|2024|       12|      DEC|2

## Inspecting Data

In [None]:
df_december_Türkiye.printSchema()

root
 |-- YEAR: integer (nullable = true)
 |-- MONTH_NUM: integer (nullable = true)
 |-- MONTH_MON: string (nullable = true)
 |-- FLT_DATE: date (nullable = true)
 |-- APT_ICAO: string (nullable = true)
 |-- APT_NAME: string (nullable = true)
 |-- STATE_NAME: string (nullable = true)
 |-- FLT_DEP_1: integer (nullable = true)
 |-- FLT_ARR_1: integer (nullable = true)
 |-- FLT_TOT_1: integer (nullable = true)
 |-- FLT_DEP_IFR_2: integer (nullable = true)
 |-- FLT_ARR_IFR_2: integer (nullable = true)
 |-- FLT_TOT_IFR_2: integer (nullable = true)



In [None]:
df_december_Türkiye.describe().show()

+-------+------+---------+---------+--------+--------------------+----------+------------------+------------------+------------------+------------------+-----------------+-----------------+
|summary|  YEAR|MONTH_NUM|MONTH_MON|APT_ICAO|            APT_NAME|STATE_NAME|         FLT_DEP_1|         FLT_ARR_1|         FLT_TOT_1|     FLT_DEP_IFR_2|    FLT_ARR_IFR_2|    FLT_TOT_IFR_2|
+-------+------+---------+---------+--------+--------------------+----------+------------------+------------------+------------------+------------------+-----------------+-----------------+
|  count|   186|      186|      186|     186|                 186|       186|               186|               186|               186|                31|               31|               31|
|   mean|2024.0|     12.0|     NULL|    NULL|                NULL|      NULL|226.38172043010752|226.66666666666666| 453.0483870967742| 681.4193548387096|684.8387096774194|1366.258064516129|
| stddev|   0.0|      0.0|     NULL|    NULL|     

In [None]:
print(df_december_Türkiye.dtypes)

[('YEAR', 'int'), ('MONTH_NUM', 'int'), ('MONTH_MON', 'string'), ('FLT_DATE', 'date'), ('APT_ICAO', 'string'), ('APT_NAME', 'string'), ('STATE_NAME', 'string'), ('FLT_DEP_1', 'int'), ('FLT_ARR_1', 'int'), ('FLT_TOT_1', 'int'), ('FLT_DEP_IFR_2', 'int'), ('FLT_ARR_IFR_2', 'int'), ('FLT_TOT_IFR_2', 'int')]


In [None]:
print(df_december_Türkiye.columns)

['YEAR', 'MONTH_NUM', 'MONTH_MON', 'FLT_DATE', 'APT_ICAO', 'APT_NAME', 'STATE_NAME', 'FLT_DEP_1', 'FLT_ARR_1', 'FLT_TOT_1', 'FLT_DEP_IFR_2', 'FLT_ARR_IFR_2', 'FLT_TOT_IFR_2']


In [None]:
df_december_Türkiye.count()

186

In [None]:
first_three_rows = df_december_Türkiye.take(3)
print(first_three_rows)

[Row(YEAR=2024, MONTH_NUM=12, MONTH_MON='DEC', FLT_DATE=datetime.date(2024, 12, 1), APT_ICAO='LTAC', APT_NAME='Ankara - Esenboğa', STATE_NAME='Türkiye', FLT_DEP_1=123, FLT_ARR_1=112, FLT_TOT_1=235, FLT_DEP_IFR_2=None, FLT_ARR_IFR_2=None, FLT_TOT_IFR_2=None), Row(YEAR=2024, MONTH_NUM=12, MONTH_MON='DEC', FLT_DATE=datetime.date(2024, 12, 1), APT_ICAO='LTAI', APT_NAME='Antalya', STATE_NAME='Türkiye', FLT_DEP_1=141, FLT_ARR_1=123, FLT_TOT_1=264, FLT_DEP_IFR_2=None, FLT_ARR_IFR_2=None, FLT_TOT_IFR_2=None), Row(YEAR=2024, MONTH_NUM=12, MONTH_MON='DEC', FLT_DATE=datetime.date(2024, 12, 1), APT_ICAO='LTBA', APT_NAME='Istanbul Atatürk', STATE_NAME='Türkiye', FLT_DEP_1=15, FLT_ARR_1=27, FLT_TOT_1=42, FLT_DEP_IFR_2=None, FLT_ARR_IFR_2=None, FLT_TOT_IFR_2=None)]


In [None]:
is_empty = df_december_Türkiye.isEmpty()
print(f"Is the DataFrame empty? {is_empty}")

Is the DataFrame empty? False


In [None]:
df_december_Türkiye.cache()

DataFrame[YEAR: int, MONTH_NUM: int, MONTH_MON: string, FLT_DATE: date, APT_ICAO: string, APT_NAME: string, STATE_NAME: string, FLT_DEP_1: int, FLT_ARR_1: int, FLT_TOT_1: int, FLT_DEP_IFR_2: int, FLT_ARR_IFR_2: int, FLT_TOT_IFR_2: int]

In [None]:
df_unique_airports = df_december_Türkiye.select("APT_NAME").distinct()
df_unique_airports.show()

+--------------------+
|            APT_NAME|
+--------------------+
|Istanbul Sabiha G...|
|            Istanbul|
|             Antalya|
|   Ankara - Esenboğa|
|    Istanbul Atatürk|
|Izmir - Adnan Men...|
+--------------------+



In [None]:
df_unique_airports = df_december_Türkiye.select("FLT_DEP_1").distinct()
df_unique_airports.show()


+---------+
|FLT_DEP_1|
+---------+
|       85|
|      321|
|      683|
|      101|
|      115|
|      126|
|       81|
|       28|
|      667|
|       76|
|       27|
|       26|
|      332|
|      103|
|      350|
|       91|
|      663|
|      333|
|      707|
|       22|
+---------+
only showing top 20 rows



In [None]:
sample_data = df_december_Türkiye.sample(fraction=0.1)
sample_data.show()

+----+---------+---------+----------+--------+--------------------+----------+---------+---------+---------+-------------+-------------+-------------+
|YEAR|MONTH_NUM|MONTH_MON|  FLT_DATE|APT_ICAO|            APT_NAME|STATE_NAME|FLT_DEP_1|FLT_ARR_1|FLT_TOT_1|FLT_DEP_IFR_2|FLT_ARR_IFR_2|FLT_TOT_IFR_2|
+----+---------+---------+----------+--------+--------------------+----------+---------+---------+---------+-------------+-------------+-------------+
|2024|       12|      DEC|2024-12-02|    LTAC|   Ankara - Esenboğa|   Türkiye|      110|      120|      230|         NULL|         NULL|         NULL|
|2024|       12|      DEC|2024-12-02|    LTFM|            Istanbul|   Türkiye|      672|      671|     1343|          672|          676|         1348|
|2024|       12|      DEC|2024-12-03|    LTFJ|Istanbul Sabiha G...|   Türkiye|      321|      316|      637|         NULL|         NULL|         NULL|
|2024|       12|      DEC|2024-12-08|    LTFJ|Istanbul Sabiha G...|   Türkiye|      350|      

In [None]:
sample_data.count()

24

In [None]:
first_row = df_december_Türkiye.first()
print(first_row)

Row(YEAR=2024, MONTH_NUM=12, MONTH_MON='DEC', FLT_DATE=datetime.date(2024, 12, 1), APT_ICAO='LTAC', APT_NAME='Ankara - Esenboğa', STATE_NAME='Türkiye', FLT_DEP_1=123, FLT_ARR_1=112, FLT_TOT_1=235, FLT_DEP_IFR_2=None, FLT_ARR_IFR_2=None, FLT_TOT_IFR_2=None)


In [None]:
limited_data = df_december_Türkiye.limit(10)
limited_data.show()

+----+---------+---------+----------+--------+--------------------+----------+---------+---------+---------+-------------+-------------+-------------+
|YEAR|MONTH_NUM|MONTH_MON|  FLT_DATE|APT_ICAO|            APT_NAME|STATE_NAME|FLT_DEP_1|FLT_ARR_1|FLT_TOT_1|FLT_DEP_IFR_2|FLT_ARR_IFR_2|FLT_TOT_IFR_2|
+----+---------+---------+----------+--------+--------------------+----------+---------+---------+---------+-------------+-------------+-------------+
|2024|       12|      DEC|2024-12-01|    LTAC|   Ankara - Esenboğa|   Türkiye|      123|      112|      235|         NULL|         NULL|         NULL|
|2024|       12|      DEC|2024-12-01|    LTAI|             Antalya|   Türkiye|      141|      123|      264|         NULL|         NULL|         NULL|
|2024|       12|      DEC|2024-12-01|    LTBA|    Istanbul Atatürk|   Türkiye|       15|       27|       42|         NULL|         NULL|         NULL|
|2024|       12|      DEC|2024-12-01|    LTBJ|Izmir - Adnan Men...|   Türkiye|       82|      

# Data Cleaning

## Checking for Null Values

In [None]:
df_december_Türkiye.filter(col("APT_ICAO").isNull()).show()
df_december_Türkiye.filter(col("APT_NAME").isNull()).show()
df_december_Türkiye.filter(col("STATE_NAME").isNull()).show()
df_december_Türkiye.filter(col("FLT_DEP_1").isNull()).show()
df_december_Türkiye.filter(col("FLT_ARR_1").isNull()).show()
df_december_Türkiye.filter(col("FLT_TOT_1").isNull()).show()

+----+---------+---------+--------+--------+--------+----------+---------+---------+---------+-------------+-------------+-------------+
|YEAR|MONTH_NUM|MONTH_MON|FLT_DATE|APT_ICAO|APT_NAME|STATE_NAME|FLT_DEP_1|FLT_ARR_1|FLT_TOT_1|FLT_DEP_IFR_2|FLT_ARR_IFR_2|FLT_TOT_IFR_2|
+----+---------+---------+--------+--------+--------+----------+---------+---------+---------+-------------+-------------+-------------+
+----+---------+---------+--------+--------+--------+----------+---------+---------+---------+-------------+-------------+-------------+

+----+---------+---------+--------+--------+--------+----------+---------+---------+---------+-------------+-------------+-------------+
|YEAR|MONTH_NUM|MONTH_MON|FLT_DATE|APT_ICAO|APT_NAME|STATE_NAME|FLT_DEP_1|FLT_ARR_1|FLT_TOT_1|FLT_DEP_IFR_2|FLT_ARR_IFR_2|FLT_TOT_IFR_2|
+----+---------+---------+--------+--------+--------+----------+---------+---------+---------+-------------+-------------+-------------+
+----+---------+---------+--------+-----

In [None]:
null_count = df_december_Türkiye.filter(col("APT_ICAO").isNull()).count()
print(f"Number of null values in APT_ICAO: {null_count}")

null_count = df_december_Türkiye.filter(col("APT_NAME").isNull()).count()
print(f"Number of null values in APT_NAME: {null_count}")

null_count = df_december_Türkiye.filter(col("STATE_NAME").isNull()).count()
print(f"Number of null values in STATE_NAME: {null_count}")

null_count = df_december_Türkiye.filter(col("FLT_DEP_1").isNull()).count()
print(f"Number of null values in FLT_DEP_1: {null_count}")

null_count = df_december_Türkiye.filter(col("FLT_ARR_1").isNull()).count()
print(f"Number of null values in FLT_ARR_1: {null_count}")

null_count = df_december_Türkiye.filter(col("FLT_TOT_1").isNull()).count()
print(f"Number of null values in FLT_TOT_1: {null_count}")

Number of null values in APT_ICAO: 0
Number of null values in APT_NAME: 0
Number of null values in STATE_NAME: 0
Number of null values in FLT_DEP_1: 0
Number of null values in FLT_ARR_1: 0
Number of null values in FLT_TOT_1: 0


In [None]:
df_december_Türkiye = df_december_Türkiye.dropna(subset=["FLT_TOT_1"])


In [None]:
df_december_Türkiye = df_december_Türkiye.fillna({"FLT_TOT_1": 0})


In [None]:
# Filter for rows where FLT_TOT_1 is 0
zero_flights_count = df_december_Türkiye.filter(col("FLT_TOT_1") == 0).count()

# Print the count
print(f"Number of rows with FLT_TOT_1 equal to 0: {zero_flights_count}")

Number of rows with FLT_TOT_1 equal to 0: 0


In [None]:
df_december_Türkiye = df_december_Türkiye.withColumnRenamed("FLT_DEP_1", "DEPARTURES")
df_december_Türkiye = df_december_Türkiye.withColumnRenamed("FLT_ARR_1", "ARRIVALS")
df_december_Türkiye = df_december_Türkiye.withColumnRenamed("FLT_TOT_1", "TOTAL FLIGHTS")
df_december_Türkiye = df_december_Türkiye.withColumnRenamed("FLT_TOT_1", "TOTAL FLIGHTS")
df_december_Türkiye = df_december_Türkiye.withColumnRenamed("STATE_NAME", "STATE NAME")
df_december_Türkiye = df_december_Türkiye.withColumnRenamed("APT_NAME", "AIRPORT NAME")
df_december_Türkiye = df_december_Türkiye.withColumnRenamed("APT_ICAO", "AIRPORT CODE")
df_december_Türkiye = df_december_Türkiye.withColumnRenamed("MONTH_NUM", "MONTH NO")
df_december_Türkiye = df_december_Türkiye.withColumnRenamed("FLT_DATE", "FLIGHT DATE")
df_december_Türkiye = df_december_Türkiye.withColumnRenamed("YEAR_NUM", "YEAR NO")



In [None]:
df_december_Türkiye.show(5)

+----+--------+---------+-----------+------------+--------------------+----------+----------+--------+-------------+-------------+-------------+-------------+
|YEAR|MONTH NO|MONTH_MON|FLIGHT DATE|AIRPORT CODE|        AIRPORT NAME|STATE NAME|DEPARTURES|ARRIVALS|TOTAL FLIGHTS|FLT_DEP_IFR_2|FLT_ARR_IFR_2|FLT_TOT_IFR_2|
+----+--------+---------+-----------+------------+--------------------+----------+----------+--------+-------------+-------------+-------------+-------------+
|2024|      12|      DEC| 2024-12-01|        LTAC|   Ankara - Esenboğa|   Türkiye|       123|     112|          235|         NULL|         NULL|         NULL|
|2024|      12|      DEC| 2024-12-01|        LTAI|             Antalya|   Türkiye|       141|     123|          264|         NULL|         NULL|         NULL|
|2024|      12|      DEC| 2024-12-01|        LTBA|    Istanbul Atatürk|   Türkiye|        15|      27|           42|         NULL|         NULL|         NULL|
|2024|      12|      DEC| 2024-12-01|        L

In [None]:
df_december_Türkiye = df_december_Türkiye.drop("FLT_DEP_IFR_2", "FLT_ARR_IFR_2", "FLT_TOT_IFR_2")
df_december_Türkiye.show()

+----+--------+---------+-----------+------------+--------------------+----------+----------+--------+-------------+
|YEAR|MONTH NO|MONTH_MON|FLIGHT DATE|AIRPORT CODE|        AIRPORT NAME|STATE NAME|DEPARTURES|ARRIVALS|TOTAL FLIGHTS|
+----+--------+---------+-----------+------------+--------------------+----------+----------+--------+-------------+
|2024|      12|      DEC| 2024-12-01|        LTAC|   Ankara - Esenboğa|   Türkiye|       123|     112|          235|
|2024|      12|      DEC| 2024-12-01|        LTAI|             Antalya|   Türkiye|       141|     123|          264|
|2024|      12|      DEC| 2024-12-01|        LTBA|    Istanbul Atatürk|   Türkiye|        15|      27|           42|
|2024|      12|      DEC| 2024-12-01|        LTBJ|Izmir - Adnan Men...|   Türkiye|        82|      85|          167|
|2024|      12|      DEC| 2024-12-01|        LTFJ|Istanbul Sabiha G...|   Türkiye|       341|     332|          673|
|2024|      12|      DEC| 2024-12-01|        LTFM|            Is

In [None]:
df_december_Türkiye.groupBy("AIRPORT NAME").agg(
    sum("TOTAL FLIGHTS").alias("Total Number of Flights")
).orderBy(col("Total Number of Flights").desc()).show()

+--------------------+-----------------------+
|        AIRPORT NAME|Total Number of Flights|
+--------------------+-----------------------+
|            Istanbul|                  42231|
|Istanbul Sabiha G...|                  20580|
|             Antalya|                   7419|
|   Ankara - Esenboğa|                   7418|
|Izmir - Adnan Men...|                   5128|
|    Istanbul Atatürk|                   1491|
+--------------------+-----------------------+



In [None]:
df_december_Türkiye.groupBy("AIRPORT NAME").agg(
    sum("DEPARTURES").alias("Total Number of Departures")
).orderBy(col("Total Number of Departures").desc()).show()

+--------------------+--------------------------+
|        AIRPORT NAME|Total Number of Departures|
+--------------------+--------------------------+
|            Istanbul|                     21101|
|Istanbul Sabiha G...|                     10285|
|             Antalya|                      3715|
|   Ankara - Esenboğa|                      3699|
|Izmir - Adnan Men...|                      2563|
|    Istanbul Atatürk|                       744|
+--------------------+--------------------------+



In [None]:
df_december_Türkiye.groupBy("AIRPORT NAME").agg(
    sum("ARRIVALS").alias("Total Number of Arrivals")
).orderBy(col("Total Number of Arrivals").desc()).show()

+--------------------+------------------------+
|        AIRPORT NAME|Total Number of Arrivals|
+--------------------+------------------------+
|            Istanbul|                   21130|
|Istanbul Sabiha G...|                   10295|
|   Ankara - Esenboğa|                    3719|
|             Antalya|                    3704|
|Izmir - Adnan Men...|                    2565|
|    Istanbul Atatürk|                     747|
+--------------------+------------------------+



## Adding New Columns

In [None]:
df_december_Türkiye.show(5)

+----+--------+---------+-----------+------------+--------------------+----------+----------+--------+-------------+
|YEAR|MONTH NO|MONTH_MON|FLIGHT DATE|AIRPORT CODE|        AIRPORT NAME|STATE NAME|DEPARTURES|ARRIVALS|TOTAL FLIGHTS|
+----+--------+---------+-----------+------------+--------------------+----------+----------+--------+-------------+
|2024|      12|      DEC| 2024-12-01|        LTAC|   Ankara - Esenboğa|   Türkiye|       123|     112|          235|
|2024|      12|      DEC| 2024-12-01|        LTAI|             Antalya|   Türkiye|       141|     123|          264|
|2024|      12|      DEC| 2024-12-01|        LTBA|    Istanbul Atatürk|   Türkiye|        15|      27|           42|
|2024|      12|      DEC| 2024-12-01|        LTBJ|Izmir - Adnan Men...|   Türkiye|        82|      85|          167|
|2024|      12|      DEC| 2024-12-01|        LTFJ|Istanbul Sabiha G...|   Türkiye|       341|     332|          673|
+----+--------+---------+-----------+------------+--------------

In [None]:
df_december_Türkiye.select("FLIGHT DATE").show()

+-----------+
|FLIGHT DATE|
+-----------+
| 2024-12-01|
| 2024-12-01|
| 2024-12-01|
| 2024-12-01|
| 2024-12-01|
| 2024-12-01|
| 2024-12-02|
| 2024-12-02|
| 2024-12-02|
| 2024-12-02|
| 2024-12-02|
| 2024-12-02|
| 2024-12-03|
| 2024-12-03|
| 2024-12-03|
| 2024-12-03|
| 2024-12-03|
| 2024-12-03|
| 2024-12-04|
| 2024-12-04|
+-----------+
only showing top 20 rows



In [None]:
df_december_Türkiye.select("FLIGHT DATE").distinct().show()

+-----------+
|FLIGHT DATE|
+-----------+
| 2024-12-01|
| 2024-12-12|
| 2024-12-02|
| 2024-12-09|
| 2024-12-30|
| 2024-12-26|
| 2024-12-03|
| 2024-12-07|
| 2024-12-18|
| 2024-12-31|
| 2024-12-16|
| 2024-12-20|
| 2024-12-24|
| 2024-12-14|
| 2024-12-27|
| 2024-12-10|
| 2024-12-21|
| 2024-12-17|
| 2024-12-06|
| 2024-12-25|
+-----------+
only showing top 20 rows



In [None]:
df_december_Türkiye.select(countDistinct("FLIGHT DATE")).show()

+---------------------------+
|count(DISTINCT FLIGHT DATE)|
+---------------------------+
|                         31|
+---------------------------+



In [None]:
df_december_Türkiye.groupBy("AIRPORT NAME").agg(
    sum("TOTAL FLIGHTS").alias("Total Number of Flights"),
    sum("DEPARTURES").alias("Total Number of Departures"),
    sum("ARRIVALS").alias("Total Number of Arrivals")
).show()

+--------------------+-----------------------+--------------------------+------------------------+
|        AIRPORT NAME|Total Number of Flights|Total Number of Departures|Total Number of Arrivals|
+--------------------+-----------------------+--------------------------+------------------------+
|Istanbul Sabiha G...|                  20580|                     10285|                   10295|
|            Istanbul|                  42231|                     21101|                   21130|
|             Antalya|                   7419|                      3715|                    3704|
|   Ankara - Esenboğa|                   7418|                      3699|                    3719|
|    Istanbul Atatürk|                   1491|                       744|                     747|
|Izmir - Adnan Men...|                   5128|                      2563|                    2565|
+--------------------+-----------------------+--------------------------+------------------------+



In [None]:
df_december_Türkiye.groupBy("FLIGHT DATE").agg(
    sum("TOTAL FLIGHTS").alias("Total Number of Flights"),
    sum("DEPARTURES").alias("Total Number of Departures"),
    sum("ARRIVALS").alias("Total Number of Arrivals")
    ).orderBy(col("Total Number of Flights").desc()).show()

+-----------+-----------------------+--------------------------+------------------------+
|FLIGHT DATE|Total Number of Flights|Total Number of Departures|Total Number of Arrivals|
+-----------+-----------------------+--------------------------+------------------------+
| 2024-12-22|                   2931|                      1466|                    1465|
| 2024-12-20|                   2926|                      1470|                    1456|
| 2024-12-21|                   2892|                      1442|                    1450|
| 2024-12-28|                   2892|                      1439|                    1453|
| 2024-12-29|                   2889|                      1446|                    1443|
| 2024-12-27|                   2887|                      1444|                    1443|
| 2024-12-08|                   2792|                      1403|                    1389|
| 2024-12-06|                   2791|                      1407|                    1384|
| 2024-12-

In [None]:
df_december_Türkiye.groupBy("FLIGHT DATE", "AIRPORT NAME").agg(
    sum("TOTAL FLIGHTS").alias("Total Number of Flights"),
    sum("DEPARTURES").alias("Total Number of Departures"),
    sum("ARRIVALS").alias("Total Number of Arrivals")
).orderBy(col("Total Number of Flights").desc()).show()

+-----------+------------+-----------------------+--------------------------+------------------------+
|FLIGHT DATE|AIRPORT NAME|Total Number of Flights|Total Number of Departures|Total Number of Arrivals|
+-----------+------------+-----------------------+--------------------------+------------------------+
| 2024-12-22|    Istanbul|                   1451|                       724|                     727|
| 2024-12-20|    Istanbul|                   1432|                       715|                     717|
| 2024-12-29|    Istanbul|                   1432|                       712|                     720|
| 2024-12-27|    Istanbul|                   1427|                       706|                     721|
| 2024-12-21|    Istanbul|                   1412|                       707|                     705|
| 2024-12-08|    Istanbul|                   1411|                       706|                     705|
| 2024-12-28|    Istanbul|                   1410|                       

In [None]:
from pyspark.sql.window import Window

window_spec = Window.partitionBy("AIRPORT NAME").orderBy(col("Total Number of Flights").desc())

df_busiest_days = df_december_Türkiye.groupBy("FLIGHT DATE", "AIRPORT NAME").agg(
    sum("TOTAL FLIGHTS").alias("Total Number of Flights")
).withColumn("rank", row_number().over(window_spec)) \
.filter(col("rank") == 1) \
.drop("rank") \
.orderBy(col("Total Number of Flights").desc()) \
.show()

+-----------+--------------------+-----------------------+
|FLIGHT DATE|        AIRPORT NAME|Total Number of Flights|
+-----------+--------------------+-----------------------+
| 2024-12-22|            Istanbul|                   1451|
| 2024-12-22|Istanbul Sabiha G...|                    701|
| 2024-12-28|             Antalya|                    343|
| 2024-12-17|   Ankara - Esenboğa|                    265|
| 2024-12-20|Izmir - Adnan Men...|                    191|
| 2024-12-19|    Istanbul Atatürk|                     75|
+-----------+--------------------+-----------------------+



In [None]:
df_december_Türkiye = df_december_Türkiye.withColumn('Difference Between Arrivals and Departures', col('ARRIVALS') - col('DEPARTURES')).show()

+----+--------+---------+-----------+------------+--------------------+----------+----------+--------+-------------+------------------------------------------+
|YEAR|MONTH NO|MONTH_MON|FLIGHT DATE|AIRPORT CODE|        AIRPORT NAME|STATE NAME|DEPARTURES|ARRIVALS|TOTAL FLIGHTS|Difference Between Arrivals and Departures|
+----+--------+---------+-----------+------------+--------------------+----------+----------+--------+-------------+------------------------------------------+
|2024|      12|      DEC| 2024-12-01|        LTAC|   Ankara - Esenboğa|   Türkiye|       123|     112|          235|                                       -11|
|2024|      12|      DEC| 2024-12-01|        LTAI|             Antalya|   Türkiye|       141|     123|          264|                                       -18|
|2024|      12|      DEC| 2024-12-01|        LTBA|    Istanbul Atatürk|   Türkiye|        15|      27|           42|                                        12|
|2024|      12|      DEC| 2024-12-01|   