In [1]:
import pandas as pd
import plotly.express as px

In [2]:
data = pd.read_csv("https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv")

In [3]:
data.head()

Unnamed: 0,Month,Passengers
0,1949-01,112
1,1949-02,118
2,1949-03,132
3,1949-04,129
4,1949-05,121


### Shape of Data

In [4]:
data.shape

(144, 2)

### Basic information about the dataset and data types

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 144 entries, 0 to 143
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Month       144 non-null    object
 1   Passengers  144 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 2.4+ KB


## Change the datatype

In [6]:
data['Month'] = pd.to_datetime(data['Month'], format='%Y-%m')

### Summary statistics

In [7]:
data.describe()

Unnamed: 0,Month,Passengers
count,144,144.0
mean,1954-12-16 05:00:00,280.298611
min,1949-01-01 00:00:00,104.0
25%,1951-12-24 06:00:00,180.0
50%,1954-12-16 12:00:00,265.5
75%,1957-12-08 18:00:00,360.5
max,1960-12-01 00:00:00,622.0
std,,119.966317


## Let's Analyze the data

**How has the number of airline passengers varied over time?**

In [None]:
fig = px.line(data, x='Month', y='Passengers', title='Air Passengers Time Series Data',
              labels={'Passengers': 'Number of Passengers'})
fig.show()

  v = v.dt.to_pydatetime()


**Are there any noticeable trends or patterns in the data?**

In [None]:
fig2 = px.line(data, x='Month', y='Passengers', title='Air Passengers Trend with Moving Average')
fig2.add_scatter(x=data['Month'], y=data['Passengers'].rolling(window=12).mean(), mode='lines', name='Moving Average')
fig2.show()

**What is the overall seasonality of air passenger traffic throughout the years?**

In [None]:
fig3 = px.line(data, x=data['Month'].dt.month, y='Passengers', color=data['Month'].dt.year,
               title='Seasonal Plot of Air Passengers Data', labels={'x': 'Month', 'y': 'Number of Passengers'},
               line_group=data['Month'].dt.year)
fig3.update_layout(legend_title_text='Year')
fig3.show()

**Are there any specific months or seasons that exhibit higher passenger numbers?**

In [None]:
df = data.copy(deep=True)

In [None]:
df.head()

In [None]:
df['Month'] = df['Month'].dt.month_name()
data_avg_monthly = df.groupby('Month')['Passengers'].mean().reset_index()
data_avg_monthly

In [None]:
fig4 = px.bar(data_avg_monthly, x='Month', y='Passengers', title='Average Monthly Passengers',
              labels={'Month': 'Month', 'Passengers': 'Average Number of Passengers'})
fig4.show()

**How does the distribution of passengers vary across different years?**

In [None]:
fig5 = px.box(data, x=data['Month'].dt.year, y='Passengers', title='Distribution of Passengers Across Years',
              labels={'x': 'Year', 'y': 'Number of Passengers'})
fig5.show()

**Are there any outliers or anomalies in the passenger data that require further investigation?**


In [None]:
fig6 = px.box(data, y='Passengers', title='Box Plot of Air Passengers Data',
              labels={'y': 'Number of Passengers'})
fig6.show()

**How stable is the growth rate of air passenger traffic over the years?**


In [None]:
data['Passengers_Pct_Change'] = data['Passengers'].pct_change() * 100
fig7 = px.line(data, x='Years', y='Passengers_Pct_Change', title='Percentage Change in Air Passengers Over Time')
fig7.show()

**What are the busiest months or seasons for air travel, and how has this changed over time?**


In [None]:
fig8 = px.line(data, x='Month', y='Passengers', color=data['Month'].dt.year,
                title='Variation in Passenger Numbers Across Months Over the Years',
                labels={'Month': 'Month', 'Passengers': 'Number of Passengers'})
fig8.show()