In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [2]:
data = pd.read_csv(r'C:\Users\user\Desktop\Delay froyo\M1_final.csv') 

In [3]:
data = data.drop(['TAIL_NUM', 'DEST'], axis=1)

In [4]:
# Handle missing values
data = data.dropna()

In [5]:
# Convert categorical variables using one-hot encoding
categorical_features = ['OP_UNIQUE_CARRIER', 'Condition']
numerical_features = ['MONTH', 'DAY_OF_MONTH', 'DAY_OF_WEEK', 'CRS_ELAPSED_TIME', 'DISTANCE', 'CRS_DEP_M', 'DEP_TIME_M', 'CRS_ARR_M', 'Temperature', 'Humidity', 'Wind Speed', 'Pressure', 'sch_dep', 'sch_arr', 'TAXI_OUT']

In [6]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

In [7]:
# Splitting data into features (X) and target (y)
X = data.drop('DEP_DELAY', axis=1)
y = (data['DEP_DELAY'] > 0).astype(int)  # Convert delay to binary label

In [8]:
# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Building the pipeline with a Random Forest classifier
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', RandomForestClassifier())])


In [10]:
# Training the model
pipeline.fit(X_train, y_train)

In [11]:
# Making predictions
y_pred = pipeline.predict(X_test)

In [12]:
# Model evaluation
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

In [13]:
print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:\n', report)

Accuracy: 0.82
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.96      0.88      4179
           1       0.82      0.42      0.56      1585

    accuracy                           0.82      5764
   macro avg       0.82      0.69      0.72      5764
weighted avg       0.82      0.82      0.79      5764



So the output contains the accuracy of the prediction model used in the code. And the macro avg and weighted avg