In [21]:
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
holidays = pd.read_csv(r"..\Data\01_raw\holidays_events.csv")
holidays.head()

Unnamed: 0,date,type,locale,locale_name,description,transferred
0,2012-03-02,Holiday,Local,Manta,Fundacion de Manta,False
1,2012-04-01,Holiday,Regional,Cotopaxi,Provincializacion de Cotopaxi,False
2,2012-04-12,Holiday,Local,Cuenca,Fundacion de Cuenca,False
3,2012-04-14,Holiday,Local,Libertad,Cantonizacion de Libertad,False
4,2012-04-21,Holiday,Local,Riobamba,Cantonizacion de Riobamba,False


In [3]:
holidays.shape

(350, 6)

In [4]:
output = []

for column in holidays.columns:

    null_values  = np.sum(pd.isna(holidays[column]))
    unique = holidays[column].nunique()
    column_data_type = holidays[column].dtype

    output.append([column, null_values, unique, column_data_type])

output_df = pd.DataFrame(output, columns = ['column','null values', 'unique', 'datatype'])

describe_df = holidays.describe().transpose() 
combine_df = pd.concat([ output_df.set_index('column'), describe_df], axis = 1, join='outer')

combine_df

Unnamed: 0,null values,unique,datatype,count,unique.1,top,freq
date,0,312,object,350,312,2014-06-25,4
type,0,6,object,350,6,Holiday,221
locale,0,3,object,350,3,National,174
locale_name,0,24,object,350,24,Ecuador,174
description,0,103,object,350,103,Carnaval,10
transferred,0,2,bool,350,2,False,338


In [5]:
holidays.drop_duplicates(inplace=True)

In [6]:
holidays.shape

(350, 6)

In [7]:
fig = px.histogram(holidays, x='type', nbins=len(holidays['type'].unique()), text_auto=True)

fig.update_layout(
    title='Count of Holidays by Type',
    xaxis_title='Type',
    yaxis_title='Count',
    showlegend=False
)

fig.show()


Maximum holidays events are of type holiday.

In [13]:
fig = px.histogram(holidays, x='locale', nbins=len(holidays['locale'].unique()), text_auto=True)

fig.update_layout(
    title='Count of Holidays by Locale',
    xaxis_title='Locale',
    yaxis_title='Count',
    showlegend=False
)

fig.show()

Locale based distribution of holidays.

In [36]:
holiday_counts = holidays.groupby(['type', 'locale']).size().reset_index(name='count')

fig = px.bar(holiday_counts, x='type', y='count', color='locale', title='Holiday Counts by Type and Locale', text_auto=True)

fig.update_layout(
    xaxis_title='Type',
    yaxis_title='Count',
    xaxis_tickangle=-45,
    width=1000,  
    height=600   
)

fig.show()

Distribution of locale names by type.

In [30]:
holiday_counts = holidays.groupby(['locale_name', 'locale']).size().reset_index(name='count')

fig = px.bar(holiday_counts, x='locale_name', y='count', color='locale', title='Holiday Counts by Locale Name and Locale', text_auto=True)

fig.update_layout(
    xaxis_title='Locale Name',
    yaxis_title='Count',
    xaxis_tickangle=-45,
    width=1000,  
    height=600   
)

fig.show()