In [1]:
import plotly.io as pio
pio.renderers.default = 'iframe'

In [2]:
import pandas as pd
import numpy as np
import io

import plotly.express as px
import plotly.graph_objects as go

import warnings
warnings.filterwarnings('ignore')

In [3]:
URL = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DV0101EN-SkillsNetwork/Data%20Files/airline_data.csv'
df =  pd.read_csv(URL,
                            encoding = "ISO-8859-1",
                            dtype={'Div1Airport': str, 'Div1TailNum': str, 
                                   'Div2Airport': str, 'Div2TailNum': str})

print('Data downloaded and read into a dataframe!')

Data downloaded and read into a dataframe!


In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,Year,Quarter,Month,DayofMonth,DayOfWeek,FlightDate,Reporting_Airline,DOT_ID_Reporting_Airline,IATA_CODE_Reporting_Airline,...,Div4WheelsOff,Div4TailNum,Div5Airport,Div5AirportID,Div5AirportSeqID,Div5WheelsOn,Div5TotalGTime,Div5LongestGTime,Div5WheelsOff,Div5TailNum
0,1295781,1998,2,4,2,4,1998-04-02,AS,19930,AS,...,,,,,,,,,,
1,1125375,2013,2,5,13,1,2013-05-13,EV,20366,EV,...,,,,,,,,,,
2,118824,1993,3,9,25,6,1993-09-25,UA,19977,UA,...,,,,,,,,,,
3,634825,1994,4,11,12,6,1994-11-12,HP,19991,HP,...,,,,,,,,,,
4,1888125,2017,3,8,17,4,2017-08-17,UA,19977,UA,...,,,,,,,,,,


In [5]:
df.shape

(27000, 110)

In [6]:
data = df.sample(500, random_state = 42)
data.head(2)

Unnamed: 0.1,Unnamed: 0,Year,Quarter,Month,DayofMonth,DayOfWeek,FlightDate,Reporting_Airline,DOT_ID_Reporting_Airline,IATA_CODE_Reporting_Airline,...,Div4WheelsOff,Div4TailNum,Div5Airport,Div5AirportID,Div5AirportSeqID,Div5WheelsOn,Div5TotalGTime,Div5LongestGTime,Div5WheelsOff,Div5TailNum
5312,985989,2006,1,3,29,3,2006-03-29,OO,20304,OO,...,,,,,,,,,,
18357,1782939,1993,3,8,3,2,1993-08-03,DL,19790,DL,...,,,,,,,,,,


In [7]:
data.Distance

5312      109.0
18357     732.0
6428      117.0
15414    1846.0
10610     432.0
          ...  
18946     254.0
16291    1514.0
21818    1044.0
24116     366.0
16705    1182.0
Name: Distance, Length: 500, dtype: float64

## 1. Scatter Plot

Let us use a scatter plot to represent departure time changes with respect to airport distance

* Title as **Distance vs Departure Time**.
* x-axis label should be **Distance**
* y-axis label should be **DeptTime**
* **Distance** column data from the flight delay dataset should be considered in x-axis
* **DepTime** column data from the flight delay dataset should be considered in y-axis
* Scatter plot markers should be of red color


In [8]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x = data.Distance,
    y = data.DepTime,
    mode = 'markers',
    marker = dict(color = "maroon")
))
fig.update_layout(title="Distance v/s Departure Time", xaxis_title = "Distance", yaxis_title = "Departure Time")

## 2. Line Plot

Let us now use a line plot to extract average monthly arrival delay time and see how it changes over the year.

* Title as **Month vs Average Flight Delay Time**.
* x-axis label should be **Month**
* y-axis label should be **ArrDelay**
* A new dataframe **line_data** should be created which consists of 2 columns average **arrival delay time per month** and **month** from the dataset
* **Month** column data from the line_data dataframe should be considered in x-axis
* **ArrDelay** column data from the ine_data dataframeshould be considered in y-axis
* Plotted line in the line plot should be of green color


In [9]:
df = data[['Month','ArrDelay']].groupby("Month")['ArrDelay'].mean().reset_index()

In [10]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x = df.Month,
    y = df.ArrDelay,
    mode = 'lines'
))

fig.update_layout(title= "Month vs Average Flight Delay Time", xaxis_title= "Month", yaxis_title= "Airplane Delay")
fig.show()

## Bar Chart

Let us use a bar chart to extract number of flights from a specific airline that goes to a destination

This plot should contain the following

* Title as **Total number of flights to the destination state split by reporting air**.
* x-axis label should be **DestState**
* y-axis label should be **Flights**
* Create a new dataframe called **bar_data**  which contains 2 columns **DestState** and **Flights**.Here **flights** indicate total number of flights in each combination.


In [11]:
bar_data = data[['DestState', 'Flights']].groupby("DestState").sum().reset_index()

In [12]:
fig = px.bar(
    bar_data,
    x = 'DestState',
    y = 'Flights',
    title = "Total number of flights to the destination state split by reporting air"
)

fig.show()

## Histogram

Let us represent the distribution of arrival delay using a histogram

* Title as **Total number of flights to the destination state split by reporting air**.
* x-axis label should be **ArrayDelay**
* y-axis will show the count of arrival delay


In [13]:
data['ArrDelay'] = data['ArrDelay'].fillna(0)

In [14]:
fig = px.histogram(
    data.ArrDelay,
    title = 'Total number of flights to the destination state split by reporting air',
)

fig.show()

## Bubble Chart

Let  use a bubble plot to represent number of flights as per reporting airline

* Title as **Reporting Airline vs Number of Flights**.
* x-axis label should be **Reporting_Airline**
* y-axis label should be **Flights**
* size of the bubble should be **Flights** indicating number of flights
* Name of the hover tooltip to `reporting_airline` using `hover_name` parameter.

In [15]:
df = data[['Reporting_Airline', 'Flights']].groupby("Reporting_Airline").sum().reset_index()
df.shape

(29, 2)

In [16]:
fig = px.scatter(
    df,
    x = 'Reporting_Airline',
    y = 'Flights',
    size = 'Flights',
    size_max = 60,
    opacity = 0.5,
    hover_name = "Reporting_Airline"
)

fig.show()

## Pie Chart

Let us represent the proportion of Flights by Distance Group (Flights indicated by numbers)

* Title as **Flight propotion by Distance Group**.
* values should be **Flights**
* names should be **DistanceGroup**


In [17]:
df = data[['DistanceGroup', 'Flights']].groupby("DistanceGroup").sum().reset_index()
df.head(4)

Unnamed: 0,DistanceGroup,Flights
0,1,90.0
1,2,136.0
2,3,91.0
3,4,69.0


In [18]:
fig = px.pie(
    df,
    values = "Flights",
    names = "DistanceGroup",
    title = "Flight propotion by Distance Group"
)
fig.show()

## Sunburst Plot

Let us represent the hierarchical view in othe order of month and destination state holding value of number of flights

*  Define hierarchy of sectors from root to leaves in `path` parameter. Here, we go from `Month` to `DestStateName` feature.
*   Set sector values in `values` parameter. Here, we can pass in `Flights` feature.
*   Show the figure.
*   Title as **Flight Distribution Hierarchy**

In [19]:
fig = px.sunburst(
    data,
    path = ['Month', 'DestStateName'],
    values = 'Flights',
    title = "Flight Distribution Heirarchy"
)
fig.show()

## Happy Coding :)