In [None]:
import pandas as pd
import numpy as np
from datetime import datetime

# Sample data
data = {
    'Flight': ['A-B', 'A-C', 'B-C', 'B-D', 'C-D'],
    'Departure Time': ['2024-10-10 08:00:00', '2024-10-10 09:00:00', '2024-10-10 10:00:00', '2024-10-10 11:00:00', '2024-10-10 12:00:00'],
    'Arrival Time': ['2024-10-10 10:00:00', '2024-10-10 12:30:00', '2024-10-10 11:30:00', '2024-10-10 13:30:00', '2024-10-10 14:00:00'],
    'Departure Lat': [34.05, 34.05, 36.16, 36.16, 40.71],
    'Departure Long': [-118.25, -118.25, -115.15, -115.15, -74.00],
    'Arrival Lat': [36.16, 40.71, 40.71, 34.05, 36.16],
    'Arrival Long': [-115.15, -74.00, -74.00, -118.25, -115.15]
}

# Create DataFrame
df = pd.DataFrame(data)

# Convert time columns to datetime
df['Departure Time'] = pd.to_datetime(df['Departure Time'])
df['Arrival Time'] = pd.to_datetime(df['Arrival Time'])

# Calculate travel time in hours
df['Travel Time (hours)'] = (df['Arrival Time'] - df['Departure Time']).dt.total_seconds() / 3600

# Haversine formula to calculate great-circle distance
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Radius of Earth in km
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    distance = R * c
    return distance

# Calculate distance for each flight
df['Distance (km)'] = df.apply(lambda row: haversine(row['Departure Lat'], row['Departure Long'], row['Arrival Lat'], row['Arrival Long']), axis=1)

# Display the DataFrame
print(df)

In [None]:
from pyspark.sql import SparkSession
from graphframes import GraphFrame

# Initialize Spark session
spark = SparkSession.builder \
    .appName("Airline Route Optimization") \
    .getOrCreate()

# Sample data for airports (nodes) and flights (edges)
airports = spark.createDataFrame([
    ("A", "Airport A"),
    ("B", "Airport B"),
    ("C", "Airport C"),
    ("D", "Airport D")
], ["id", "name"])

flights = spark.createDataFrame([
    ("A", "B", 100),  # from A to B with weight 100 (e.g., travel time or cost)
    ("A", "C", 300),
    ("B", "C", 100),
    ("B", "D", 200),
    ("C", "D", 100)
], ["src", "dst", "weight"])

# Create GraphFrame
graph = GraphFrame(airports, flights)

# Find shortest paths from Airport A to all other airports
results = graph.shortestPaths(landmarks=["A"])
results.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming you have the data as a Pandas DataFrame after processing in PySpark
data = {
    'Flight': ['A-B', 'A-C', 'B-C', 'B-D', 'C-D'],
    'Planned Arrival Time': [100, 300, 100, 200, 100],
    'Actual Arrival Time': [120, 310, 110, 210, 120]
}

# Create DataFrame
df = pd.DataFrame(data)

# Calculate delay
df['Delay'] = df['Actual Arrival Time'] - df['Planned Arrival Time']

# Plotting delay for each flight
plt.figure(figsize=(10, 6))
sns.barplot(x='Flight', y='Delay', data=df, palette='viridis')
plt.title('Flight Delays')
plt.xlabel('Flight')
plt.ylabel('Delay (minutes)')
plt.show()

# Visualizing the shortest path weights
shortest_path_data = {
    'Route': ['A-B', 'A-C', 'B-C', 'B-D', 'C-D'],
    'Weight': [100, 300, 100, 200, 100]
}

shortest_path_df = pd.DataFrame(shortest_path_data)

plt.figure(figsize=(10, 6))
sns.lineplot(x='Route', y='Weight', data=shortest_path_df, marker='o')
plt.title('Shortest Path Weights')
plt.xlabel('Route')
plt.ylabel('Weight (e.g., Travel Time or Cost)')
plt.show()

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Load flight data (assuming it's loaded into a Pandas DataFrame)
data = pd.read_csv('flights_data.csv')

# Feature engineering: select relevant features for delay prediction
features = data[['planned_departure', 'planned_arrival', 'distance']]
labels = data[['delay']]

# Normalize the data
scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)
labels_scaled = scaler.fit_transform(labels)

# Prepare the data for LSTM
sequence_length = 10  # Number of previous time steps to use for prediction
X, y = [], []
for i in range(len(features_scaled) - sequence_length):
    X.append(features_scaled[i:i+sequence_length])
    y.append(labels_scaled[i+sequence_length])

X, y = np.array(X), np.array(y)

# Split the data into training and testing sets
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Build the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(50))
model.add(Dense(1))

# Compile and train the model
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Make predictions
predictions = model.predict(X_test)

# Inverse transform the predictions to original scale
predictions_inverse = scaler.inverse_transform(predictions)