In [2]:
import holidays
from datetime import date
for day in sorted(holidays.Nigeria(years = 2021).items()):
    print(day)

(datetime.date(2021, 1, 1), "New Year's Day")
(datetime.date(2021, 4, 2), 'Good Friday')
(datetime.date(2021, 4, 5), 'Easter Monday')
(datetime.date(2021, 5, 1), "Workers' Day")
(datetime.date(2021, 5, 3), "Workers' Day (observed)")
(datetime.date(2021, 5, 13), 'Eid-el-Fitr (estimated)')
(datetime.date(2021, 5, 14), 'Eid-el-Fitr Holiday (estimated)')
(datetime.date(2021, 6, 12), 'Democracy Day')
(datetime.date(2021, 6, 14), 'Democracy Day (observed)')
(datetime.date(2021, 7, 20), 'Eid-el-Kabir (estimated)')
(datetime.date(2021, 7, 21), 'Eid-el-Kabir Holiday (estimated)')
(datetime.date(2021, 10, 1), 'Independence Day')
(datetime.date(2021, 10, 18), 'Eid-el-Mawlid (estimated)')
(datetime.date(2021, 12, 25), 'Christmas Day')
(datetime.date(2021, 12, 26), 'Boxing Day')
(datetime.date(2021, 12, 27), 'Christmas Day (observed)')
(datetime.date(2021, 12, 28), 'Boxing Day (observed)')


In [3]:
import pandas as pd
df =pd.read_csv('../data/nb.csv')


In [6]:
import pandas as pd
import holidays

def add_holiday_feature(df, date_column):
    """
    Add a holiday feature to the dataframe based on the specified date column.
    
    Parameters:
    df (pd.DataFrame): The input dataframe.
    date_column (str): The name of the column containing datetime values.
    
    Returns:
    pd.DataFrame: Dataframe with a new 'is_holiday' column indicating if the date is a holiday.
    """
    # Ensure the date column is in datetime format and handle NaN values
    df[date_column] = pd.to_datetime(df[date_column], errors='coerce')
    
    # Drop rows with NaN values in the date column
    df = df.dropna(subset=[date_column])
    
    # Generate holiday feature
    years = df[date_column].dt.year.unique()
    ng_holidays = holidays.Nigeria(years=years)
    df['is_holiday'] = df[date_column].dt.date.apply(lambda x: x in ng_holidays)
    
    print("Completed adding holiday feature.")
    return df


In [7]:
# Example usage:
df = pd.read_csv('../data/nb.csv')
df = add_holiday_feature(df, 'Trip Start Time')
print(df.head())

Completed adding holiday feature.
   Trip ID                         Trip Origin  \
0   391996  6.508813001668548,3.37740316890347   
1   391997                 6.4316714,3.4555375   
2   391998         6.631679399999999,3.3388976   
3   391999         6.572757200000001,3.3677082   
4   392001                 6.6010417,3.2766339   

                  Trip Destination     Trip Start Time        Trip End Time  \
0      6.650969799999999,3.3450307 2021-07-01 07:28:04  2021-07-01 07:29:37   
1  6.4280814653326,3.4721885847586 2021-07-01 06:38:04  2021-07-01 07:07:28   
2      6.508324099999999,3.3590397 2021-07-01 06:21:02  2021-07-01 07:02:23   
3      6.584881099999999,3.3614073 2021-07-01 07:16:07  2021-07-01 07:29:42   
4              6.4501069,3.3916154 2021-07-01 09:30:59  2021-07-01 09:34:36   

   is_holiday  
0       False  
1       False  
2       False  
3       False  
4       False  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['is_holiday'] = df[date_column].dt.date.apply(lambda x: x in ng_holidays)


In [8]:
print(df['is_holiday'].unique())


[False  True]
