In [1]:
import numpy as np
from datetime import datetime, timedelta
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
def generate_date_string(start_date, end_date):
    date_format = "%Y/%m/%d"
    dates_strings = []

    start = datetime.strptime(start_date, date_format)
    end = datetime.strptime(end_date, date_format)

    current_date = start
    while current_date <= end:
        dates_strings.append(current_date.strftime(date_format))
        current_date += timedelta(days=1)

    return dates_strings

In [3]:
def generate_date_array(start_date, end_date):
    date_format = "%Y/%m/%d"
    start = datetime.strptime(start_date, date_format)
    end = datetime.strptime(end_date, date_format)

    # Create an empty list to store the generated dates
    date_list = []

    # Generate the dates between the start and end dates
    current_date = start
    while current_date <= end:
        formatted_date = current_date.strftime(date_format).replace('/', '')
        date_list.append(formatted_date)
        current_date += timedelta(days=1)

    # Convert the list to a NumPy array
    date_array = np.array(date_list)

    return date_array

In [4]:
def get_day_of_week(date_string):
    try:
        date = datetime.strptime(date_string, "%Y/%m/%d")
        day_of_week = date.strftime("%A")
        return day_of_week
    except ValueError:
        return "Invalid date format. Please provide a date in the format YYYY/MM/DD."

In [106]:
years = np.arange(2014,2024,1)
start_dates = []
end_dates = []
for year in years:
    start_dates.append(f"{year}/01/01")
    end_dates.append(f"{year}/12/31")

In [107]:
d = {year: pd.DataFrame() for year in years}
for year in years:
    start_date = start_dates[year-2014]
    end_date = end_dates[year-2014]
    dates_strings = generate_date_string(start_date, end_date)
    dates_array = generate_date_array(start_date, end_date)
    dates_df = pd.DataFrame(dates_strings)
    dates_df.columns = ['dates']
    dates_df['weeks'] = dates_df['dates'].apply(get_day_of_week)
    dates_list_df = pd.DataFrame(dates_array)
    d[year] = pd.concat([dates_list_df,dates_df],axis=1)

In [108]:
d_weekday = {year: pd.DataFrame() for year in years}
for year in years:
    not_sun = d[year]["weeks"] != "Sunday"
    not_sun_weekday = d[year][not_sun]
    not_sat = not_sun_weekday["weeks"] != "Saturday"
    d_weekday[year] = not_sun_weekday[not_sat]
    d_weekday[year].columns = ['dates', 'weeks', 'dates1']

In [109]:
year_of = int(input("Enter chosen year"))

In [110]:
dates = d_weekday[year_of]['dates'].values
pages = []
for date in dates:
    expression = f"https://www.bondspot.pl/tbsp_index_archiwum?date={date}"
    pages.append(expression)

In [111]:
datyes = []
opening_values = []
closing_values = []
cells = []
timeout_seconds = 300

In [112]:
for page in pages:
    response = requests.get(page, timeout = timeout_seconds)
    soup = BeautifulSoup(response.content, "html.parser")
    rows = soup.find_all('tr')
    for row in rows:
        try:
            cells = row.find_all('td')
        except AttributeError:
            continue
    datyes.append(cells[0].text.strip())
    opening_values.append(cells[1].text.strip())
    closing_values.append(cells[2].text.strip())


In [113]:
d_final = {year: pd.DataFrame() for year in years}
df = pd.DataFrame(datyes)
df_2 = pd.DataFrame(opening_values)
df_3 = pd.DataFrame(closing_values)
d_final[year_of] = pd.concat([df, df_2, df_3], axis=1)
d_final[year_of].columns = ['dates', 'opening_values', 'closing_values']
d_final[year_of] = d_final[year_of][d_final[year_of].closing_values != '-']
d_final[year_of]['closing_values'] = pd.to_numeric(d_final[year_of]['closing_values'])
d_final[year_of]['opening_values'] = pd.to_numeric(d_final[year_of]['opening_values'])
d_final[year_of] = d_final[year_of][d_final[year_of].opening_values > 28]

In [114]:
d_final[year_of].to_csv(f"data/{year_of}.csv")

In [115]:
d_final[year_of]

Unnamed: 0,dates,opening_values,closing_values
0,2023-01-02,1747.10,1752.60
1,2023-01-03,1754.20,1763.15
2,2023-01-04,1775.62,1776.00
3,2023-01-05,1777.10,1790.73
5,2023-01-09,1812.90,1812.27
...,...,...,...
120,2023-06-19,1850.53,1850.58
121,2023-06-20,1852.65,1853.54
122,2023-06-21,1853.05,1853.69
123,2023-06-22,1856.29,1857.36
