In [2]:
import numpy as np
from datetime import datetime, timedelta
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [3]:
def generate_date_string(start_date, end_date):
    date_format = "%Y/%m/%d"
    dates_strings = []

    start = datetime.strptime(start_date, date_format)
    end = datetime.strptime(end_date, date_format)

    current_date = start
    while current_date <= end:
        dates_strings.append(current_date.strftime(date_format))
        current_date += timedelta(days=1)

    return dates_strings

In [19]:
def generate_date_array(start_date, end_date):
    date_format = "%Y/%m/%d"
    start = datetime.strptime(start_date, date_format)
    end = datetime.strptime(end_date, date_format)

    # Create an empty list to store the generated dates
    date_list = []

    # Generate the dates between the start and end dates
    current_date = start
    while current_date <= end:
        formatted_date = current_date.strftime(date_format).replace('/', '')
        date_list.append(formatted_date)
        current_date += timedelta(days=1)

    # Convert the list to a NumPy array
    date_array = np.array(date_list)

    return date_array

In [20]:
def get_day_of_week(date_string):
    try:
        date = datetime.strptime(date_string, "%Y/%m/%d")
        day_of_week = date.strftime("%A")
        return day_of_week
    except ValueError:
        return "Invalid date format. Please provide a date in the format YYYY/MM/DD."

In [36]:
years = np.arange(2014,2023,1)
start_dates = []
end_dates = []
for year in years:
    start_dates.append(f"{year}/01/01")
    end_dates.append(f"{year}/12/31")

array([2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022])

In [53]:
d = {year: pd.DataFrame() for year in years}
for year in years:
    start_date = start_dates[year-2014]
    end_date = end_dates[year-2014]
    dates_strings = generate_date_string(start_date, end_date)
    dates_array = generate_date_array(start_date, end_date)
    dates_df = pd.DataFrame(dates_strings)
    dates_df.columns = ['dates']
    dates_df['weeks'] = dates_df['dates'].apply(get_day_of_week)
    dates_list_df = pd.DataFrame(dates_array)
    d[year] = pd.concat([dates_list_df,dates_df],axis=1)

{2014:             0       dates      weeks
 0    20140101  2014/01/01  Wednesday
 1    20140102  2014/01/02   Thursday
 2    20140103  2014/01/03     Friday
 3    20140104  2014/01/04   Saturday
 4    20140105  2014/01/05     Sunday
 ..        ...         ...        ...
 360  20141227  2014/12/27   Saturday
 361  20141228  2014/12/28     Sunday
 362  20141229  2014/12/29     Monday
 363  20141230  2014/12/30    Tuesday
 364  20141231  2014/12/31  Wednesday
 
 [365 rows x 3 columns],
 2015:             0       dates      weeks
 0    20150101  2015/01/01   Thursday
 1    20150102  2015/01/02     Friday
 2    20150103  2015/01/03   Saturday
 3    20150104  2015/01/04     Sunday
 4    20150105  2015/01/05     Monday
 ..        ...         ...        ...
 360  20151227  2015/12/27     Sunday
 361  20151228  2015/12/28     Monday
 362  20151229  2015/12/29    Tuesday
 363  20151230  2015/12/30  Wednesday
 364  20151231  2015/12/31   Thursday
 
 [365 rows x 3 columns],
 2016:             0  

In [56]:
d_weekday = {year: pd.DataFrame() for year in years}
for year in years:
    not_sun = d[year]["weeks"] != "Sunday"
    not_sun_weekday = d[year][not_sun]
    not_sat = not_sun_weekday["weeks"] != "Saturday"
    d_weekday[year] = not_sun_weekday[not_sat]
    d_weekday[year].columns = ['dates', 'weeks', 'dates1']

In [61]:
year_of = int(input("Enter chosen year"))

In [62]:
dates1 = d_weekday[year_of]['dates1'].values
pages = []
for date in dates1:
    expression = f"https://www.bondspot.pl/tbsp_index_archiwum?date={date}"
    pages.append(expression)

In [27]:
dates = []
opening_values = []
closing_values = []
cells = []

In [28]:
for page in pages:
    response = requests.get(page)
    soup = BeautifulSoup(response.content, "html.parser")
    rows = soup.find_all('tr')
    for row in rows:
        try:
            cells = row.find_all('td')
        except AttributeError:
            continue
    dates.append(cells[0].text.strip())
    opening_values.append(cells[1].text.strip())
    closing_values.append(cells[2].text.strip())

In [65]:
d_final = {year: pd.DataFrame() for year in years}
df = pd.DataFrame(dates)
df_2 = pd.DataFrame(opening_values)
df_3 = pd.DataFrame(closing_values)
d_final[year_of] = pd.concat([df, df_2, df_3], axis=1)
d_final[year_of].columns = ['dates', 'opening_values', 'closing_values']