In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder

# Step 1: Load Data
df = pd.read_csv(r'C:\Users\Ahmed\Desktop\AIRLINE FILES\FLIGHTS.csv')

# Step 2: Feature Engineering
df['departure_hour'] = pd.to_datetime(df['departure_time']).dt.hour
df['route'] = df['origin'] + "_" + df['destination']

# Step 3: Simulate Target
np.random.seed(42)
df['Delayed'] = np.where(df['departure_hour'] >= 18, 1, 0)  # Flights after 6PM more likely delayed

# Step 4: Preprocessing
le_origin = LabelEncoder()
le_dest = LabelEncoder()
le_route = LabelEncoder()

df['origin_encoded'] = le_origin.fit_transform(df['origin'])
df['destination_encoded'] = le_dest.fit_transform(df['destination'])
df['route_encoded'] = le_route.fit_transform(df['route'])

features = ['origin_encoded', 'destination_encoded', 'departure_hour', 'route_encoded']
X = df[features]
y = df['Delayed']

# Step 5: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Train Model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Step 7: Predictions
y_pred = model.predict(X_test)

# Step 8: Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 1.0

Confusion Matrix:
 [[162   0]
 [  0  42]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       162
           1       1.00      1.00      1.00        42

    accuracy                           1.00       204
   macro avg       1.00      1.00      1.00       204
weighted avg       1.00      1.00      1.00       204

