## Imports

In [1]:
import numpy as np
import pandas as pd
import datetime
import holidays

## Data Loading 



In [2]:
df=pd.read_csv("messages.csv")
df.head()

Unnamed: 0,date,msg,Unnamed: 2
0,2013-12-15 00:50:00,ищу на сегодня мужика 37,
1,2014-04-29 23:40:00,ПАРЕНЬ БИ ИЩЕТ ДРУГА СЕЙЧАС!! СМС ММС 0955532826,
2,2012-12-30 00:21:00,Днепр.м 43 позн.с д/ж *.о 067.16.34.576,
3,2012-08-18 00:50:00,ищу на сегодня мужика 37,
4,2013-10-26 23:11:00,Зая я тебя никогда не обижу люблю тебя!) Даше,


In [3]:
df=df[["date"]]
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    1000 non-null   object
dtypes: object(1)
memory usage: 7.9+ KB


In [4]:
df["date"]=pd.to_datetime(df["date"])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    1000 non-null   datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 7.9 KB


## Extract Date Features
- Year
- Month
- Day of the week
- Quarter

In [5]:
df["year"]=df["date"].dt.year
df["month"]=df["date"].dt.month
df["day"]=df["date"].dt.day
df["day_of_week"]=df["date"].dt.day_name()
df["quarter"]=df["date"].dt.quarter

In [6]:
df.head()

Unnamed: 0,date,year,month,day,day_of_week,quarter
0,2013-12-15 00:50:00,2013,12,15,Sunday,4
1,2014-04-29 23:40:00,2014,4,29,Tuesday,2
2,2012-12-30 00:21:00,2012,12,30,Sunday,4
3,2012-08-18 00:50:00,2012,8,18,Saturday,3
4,2013-10-26 23:11:00,2013,10,26,Saturday,4


## Extract Time Features
- Hour
- Minutes
- Seconds


In [7]:
df["hour"]=df["date"].dt.hour
df["minute"]=df["date"].dt.minute
df["second"]=df["date"].dt.second
df.head()

Unnamed: 0,date,year,month,day,day_of_week,quarter,hour,minute,second
0,2013-12-15 00:50:00,2013,12,15,Sunday,4,0,50,0
1,2014-04-29 23:40:00,2014,4,29,Tuesday,2,23,40,0
2,2012-12-30 00:21:00,2012,12,30,Sunday,4,0,21,0
3,2012-08-18 00:50:00,2012,8,18,Saturday,3,0,50,0
4,2013-10-26 23:11:00,2013,10,26,Saturday,4,23,11,0


## Holidays Extraction
- Weekends
- National Holidays



### Weekends Extraction

In [8]:
country_category="gulf"
if country_category=="gulf":
    df["is_weekend"]=df["day_of_week"].isin(["Friday", "Saturday"])
else:
    df["is_weekend"]=df["day_of_week"].isin([ "Saturday", "Sunday"])

In [9]:
df.head(20)

Unnamed: 0,date,year,month,day,day_of_week,quarter,hour,minute,second,is_weekend
0,2013-12-15 00:50:00,2013,12,15,Sunday,4,0,50,0,False
1,2014-04-29 23:40:00,2014,4,29,Tuesday,2,23,40,0,False
2,2012-12-30 00:21:00,2012,12,30,Sunday,4,0,21,0,False
3,2012-08-18 00:50:00,2012,8,18,Saturday,3,0,50,0,True
4,2013-10-26 23:11:00,2013,10,26,Saturday,4,23,11,0,True
5,2016-03-08 22:52:00,2016,3,8,Tuesday,1,22,52,0,False
6,2014-02-18 00:23:00,2014,2,18,Tuesday,1,0,23,0,False
7,2012-11-23 01:10:00,2012,11,23,Friday,4,1,10,0,True
8,2014-12-23 01:20:00,2014,12,23,Tuesday,4,1,20,0,False
9,2012-11-03 23:46:00,2012,11,3,Saturday,4,23,46,0,True


### National Holidays Extraction
https://towardsdatascience.com/the-easiest-way-to-identify-holidays-in-python-58333176af4f

In [10]:
years_range=list(range(2012, 2016))
country="SaudiArabia"
holidays=holidays.CountryHoliday(country,years=years_range).items()

In [11]:
holidays

dict_items([(datetime.date(2012, 8, 18), 'Eid al-Fitr Holiday'), (datetime.date(2012, 8, 19), 'Eid al-Fitr Holiday'), (datetime.date(2012, 8, 20), 'Eid al-Fitr Holiday'), (datetime.date(2012, 8, 21), 'Eid al-Fitr Holiday'), (datetime.date(2012, 10, 25), 'Arafat Day Holiday'), (datetime.date(2012, 10, 26), 'Eid al-Adha Holiday'), (datetime.date(2012, 10, 27), 'Eid al-Adha Holiday'), (datetime.date(2012, 10, 28), 'Eid al-Adha Holiday'), (datetime.date(2012, 10, 29), 'Eid al-Adha Holiday (observed)'), (datetime.date(2012, 10, 30), 'Eid al-Adha Holiday (observed)'), (datetime.date(2012, 9, 23), 'National Day Holiday'), (datetime.date(2013, 8, 7), 'Eid al-Fitr Holiday'), (datetime.date(2013, 8, 8), 'Eid al-Fitr Holiday'), (datetime.date(2013, 8, 9), 'Eid al-Fitr Holiday'), (datetime.date(2013, 8, 10), 'Eid al-Fitr Holiday'), (datetime.date(2013, 8, 11), 'Eid al-Fitr Holiday (observed)'), (datetime.date(2013, 8, 12), 'Eid al-Fitr Holiday (observed)'), (datetime.date(2013, 10, 14), 'Arafat Da

In [12]:
holidays=[str(x[0]) for x in holidays]

In [13]:
holidays

['2012-08-18',
 '2012-08-19',
 '2012-08-20',
 '2012-08-21',
 '2012-10-25',
 '2012-10-26',
 '2012-10-27',
 '2012-10-28',
 '2012-10-29',
 '2012-10-30',
 '2012-09-23',
 '2013-08-07',
 '2013-08-08',
 '2013-08-09',
 '2013-08-10',
 '2013-08-11',
 '2013-08-12',
 '2013-10-14',
 '2013-10-15',
 '2013-10-16',
 '2013-10-17',
 '2013-09-23',
 '2014-07-27',
 '2014-07-28',
 '2014-07-29',
 '2014-07-30',
 '2014-10-03',
 '2014-10-04',
 '2014-10-05',
 '2014-10-06',
 '2014-10-07',
 '2014-10-08',
 '2014-09-23',
 '2015-07-17',
 '2015-07-18',
 '2015-07-19',
 '2015-07-20',
 '2015-07-21',
 '2015-07-22',
 '2015-09-22',
 '2015-09-23',
 '2015-09-24',
 '2015-09-25',
 '2015-09-26']

In [14]:
df['only_date'] =  pd.to_datetime(df["only_date"].dt.strftime('%Y-%m-%d'))

In [15]:
df["is_holiday"]=df["date"].isin(holidays)

In [16]:
df_holidays=df.loc[df.is_holiday]

In [17]:
df_holidays.head()

Unnamed: 0,date,year,month,day,day_of_week,quarter,hour,minute,second,is_weekend,only_date,is_holiday
558,2014-10-06,2014,10,6,Monday,4,0,0,0,False,2014-10-06,True
