In [2]:
import requests
from bs4 import BeautifulSoup
import concurrent.futures
import pandas as pd

def scrape_holidays(year):
    url = f"https://www.timeanddate.com/holidays/pakistan/{year}?hol=1"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', {'class': 'table table--left table--inner-borders-rows table--full-width table--sticky table--holidaycountry'})
    for row in table('tr', {'class': 'hiderow'}):
        row.decompose()
    df = pd.read_html(str(table), header=1)[0]
    df = df.drop([0, 1])
    df.dropna(inplace=True)
    df['Year'] = year
    return df


def print_progress(year):
    print(f"Finished scraping holidays for {year}")


all_data = pd.DataFrame()
years = range(2014, 2021)

with concurrent.futures.ThreadPoolExecutor() as executor:
    results = [executor.submit(scrape_holidays, year) for year in years]
    for f in concurrent.futures.as_completed(results):
        df = f.result()
        all_data = pd.concat([all_data, df], ignore_index=True)

df=all_data
print(df)
df['Date'] = df['Date'].replace(r'^\s*$', '01 Jan', regex=True)
df['Year'] = df['Year'].astype(str)  # convert 'Year' column to string type

df['Date'] = df['Date'] + ' ' + df['Year']
df['Date'] = pd.to_datetime(df['Date'], format='%d %b %Y')

import pandas as pd
DailyDataset = pd.read_csv('DailyDataset.csv')
DailyDataset['Date'] = pd.to_datetime(DailyDataset['Date'])

df['Date'] = pd.to_datetime(df['Date'])
matching_dates = df['Date']

# create a new column 'Event' and initialize it to 0
DailyDataset['Event'] = 0

# update the 'Event' column to 1 for matching dates
DailyDataset.loc[DailyDataset['Date'].isin(matching_dates), 'Event'] = 1

# print the updated DataFrame
DailyDataset.to_csv('DailyDataset')


       Date Unnamed: 1                 Name            Type  Year
0     5 Feb  Wednesday          Kashmir Day  Public Holiday  2014
1    23 Mar     Sunday         Pakistan Day  Public Holiday  2014
2     1 May   Thursday           Labour Day  Public Holiday  2014
3    29 Jul    Tuesday          Eid-ul-Fitr  Public Holiday  2014
4    30 Jul  Wednesday  Eid-ul-Fitr Holiday  Public Holiday  2014
..      ...        ...                  ...             ...   ...
116  11 Oct    Tuesday               Ashura  Public Holiday  2016
117  12 Oct  Wednesday       Ashura Holiday  Public Holiday  2016
118  12 Dec     Monday    Eid Milad un-Nabi  Public Holiday  2016
119  25 Dec     Sunday        Christmas Day  Public Holiday  2016
120  25 Dec     Sunday     Quaid-e-Azam Day  Public Holiday  2016

[121 rows x 5 columns]


KeyError: 'Date'

In [55]:
pd.set_option('display.max_rows', None)

DailyDataset.head(300)

Unnamed: 0,M01AB,M01AE,N02BA,N02BE,N05B,N05C,R03,R06,Temperature,Dew Point,Humidity,Wind Speed,Wind,Pressure,IntCondition,Year,Month,Day,Date,Event
0,0,4,3,32,7,0,0,2,61.9375,15.125,17.5625,15.6875,35.375,30.07,1.25,2014,1,2,2014-01-02,0
1,8,4,4,50,16,0,20,4,60.043478,21.565217,25.521739,8.173913,144.086957,30.008696,1.0,2014,1,3,2014-01-03,0
2,2,1,6,61,9,0,9,1,61.727273,37.272727,45.545455,5.5,106.5,28.555909,1.0,2014,1,4,2014-01-04,0
3,4,3,7,41,8,0,3,0,61.347826,25.956522,33.26087,4.913043,58.869565,29.910435,1.0,2014,1,5,2014-01-05,0
4,5,1,4,21,16,2,6,2,59.227273,25.818182,28.772727,8.227273,46.227273,24.507273,1.409091,2014,1,6,2014-01-06,0
5,0,0,0,0,0,0,0,0,61.136364,19.590909,23.272727,8.318182,40.227273,29.934091,1.0,2014,1,7,2014-01-07,0
6,5,3,9,26,17,1,10,0,59.714286,24.809524,29.285714,6.666667,60.333333,29.915714,2.238095,2014,1,8,2014-01-08,0
7,7,1,6,24,13,0,3,2,58.181818,23.0,36.318182,6.909091,25.681818,29.990909,1.0,2014,1,9,2014-01-09,0
8,5,1,2,52,14,2,0,2,57.210526,22.105263,32.368421,7.368421,53.421053,29.978421,1.631579,2014,1,10,2014-01-10,0
9,4,2,10,43,10,0,1,0,59.904762,23.142857,28.952381,7.571429,37.714286,28.639524,2.428571,2014,1,11,2014-01-11,0
