In [1]:
import streamlit as st
import pandas as pd
import plotly.express as px
import base64
import matplotlib.pyplot as plt

In [2]:
@st.cache_data
def load_data(csv):
    return pd.read_csv(csv)




In [4]:
flights_data = load_data("Flight_delay.csv/flights_sample_3m.csv")

In [6]:
column_names = flights_data.columns.tolist()
column_names

['FL_DATE',
 'AIRLINE',
 'AIRLINE_DOT',
 'AIRLINE_CODE',
 'DOT_CODE',
 'FL_NUMBER',
 'ORIGIN',
 'ORIGIN_CITY',
 'DEST',
 'DEST_CITY',
 'CRS_DEP_TIME',
 'DEP_TIME',
 'DEP_DELAY',
 'TAXI_OUT',
 'WHEELS_OFF',
 'WHEELS_ON',
 'TAXI_IN',
 'CRS_ARR_TIME',
 'ARR_TIME',
 'ARR_DELAY',
 'CANCELLED',
 'CANCELLATION_CODE',
 'DIVERTED',
 'CRS_ELAPSED_TIME',
 'ELAPSED_TIME',
 'AIR_TIME',
 'DISTANCE',
 'DELAY_DUE_CARRIER',
 'DELAY_DUE_WEATHER',
 'DELAY_DUE_NAS',
 'DELAY_DUE_SECURITY',
 'DELAY_DUE_LATE_AIRCRAFT']

In [8]:
# converting 'FL_DATE' column to datetime with the specific format in my dataset of day, month and then year.
flights_data['FL_DATE'] = pd.to_datetime(flights_data['FL_DATE'], format='%Y-%m-%d')

# adding a new column for day and month
flights_data['Day_Month'] = flights_data['FL_DATE'].dt.strftime('%d-%b')

# group by day_month and count the number of flights.
daily_flights = flights_data.groupby('Day_Month')['FL_NUMBER'].count().reset_index(name='TotalFlights')
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']

# converting Day_Month to categorical and sorting it by the day and month.
daily_flights['Day_Month'] = pd.Categorical(daily_flights['Day_Month'], categories=[f"{day}-{month}" for month in months for day in range(1, 32)], ordered=True)

# sort daily_flights by day_month.
daily_flights = daily_flights.sort_values('Day_Month')

# making a line graph that plots the total number of flights over time.
fig = px.line(daily_flights, x='Day_Month', y='TotalFlights', markers=True, 
              labels={'Day_Month': 'Day-Month', 'TotalFlights': 'Total Flights'},
              title='Total Number of Flights by Day and Month',
              hover_name='Day_Month', hover_data={'Day_Month': False, 'TotalFlights': True})

# adding a tool tip
fig.update_traces(hovertemplate='<b>Day-Month:</b> %{x}<br><b>Total Flights:</b> %{y:.0f}')
st.plotly_chart(fig)

DeltaGenerator()

In [None]:
# BAR CHART FOR TOP 5 AIRPORTS DUE TO SLECTED DELAY TYPES
st.header('Top 5 Airports by Delay Type')
st.write("The bar chart below shows the top 5 bussiest airports based on your chosen dealy type.")
st.write("There are five types of dealys: ")
st.write("1. CarrierDelay ✈ Delay caused due to carrier for example maintenance, crew problems, aircraft cleaning, fueling, etc.")
st.write("2. WeatherDelay ✈ Dealy due to extreme weather conditions.")
st.write("3. NASDelay ✈ Delay by National Aviation System (NAS) casued due to non-extreme weather conditions, airport operations, heavy traffic volumes, air traffic control, etc.")
st.write("4. SecurityDelay ✈ Delay due to security issues.")
st.write("5. LateAircraftDelay ✈ Delay due to delayed aircrafts.")
# filtering delayed flights.
delayed_flights = flights_data[flights_data['ArrDelay'] > 0]

# create a list of reasons for delay.
delay_reasons = ['CarrierDelay', 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay']

# adding a selectbox to choose the reason for delay.
selected_reason = st.selectbox("Select Reason for Delay:", delay_reasons)

# filtering data based on selected delay reason.
if selected_reason != 'LateAircraftDelay':  
    delayed_flights = delayed_flights[delayed_flights[selected_reason] > 0]

# count delayed flights by airport.
delayed_by_airport = delayed_flights.groupby('Dest')['FlightNum'].count().reset_index()
delayed_by_airport.columns = ['Airport', 'Delayedflights_datalights']

# sort airports by the number of delayed flights.
delayed_by_airport_sorted = delayed_by_airport.sort_values(by='Delayedflights_datalights', ascending=True)

# selecting top 5 airports only.
top_5_airports = delayed_by_airport_sorted.head(5)

# creating horizontal bar plot for the top 5 airports.
fig = px.bar(top_5_airports, y='Airport', x='Delayedflights_datalights',
             title=f'Top 5 Airports with the Highest Number of Delayed Flights due to {selected_reason}',
             labels={'Airport': 'Airport Code', 'Delayedflights_datalights': 'Number of Delayed Flights'},
             orientation='h')

# adding a tooltip
fig.update_traces(hovertemplate='Airport: %{y}<br>Number of Delayed Flights: %{x:.0f}<extra></extra>')
st.plotly_chart(fig)
