In [132]:
import pandas as pd

df = pd.read_csv('data.csv')
df = df.reset_index()

In [86]:
from bs4 import BeautifulSoup
cache: dict[str, BeautifulSoup] = {}

In [89]:
from typing import Tuple
import requests

def get_weather(loc: str, date: str) -> Tuple[str, int] | None:
    year, month, day = map(int, date.split('-'))

    key = f'{loc}-{year}-{month}'
    if key in cache:
        weather = cache[key]
    else:
        search_data = requests.get(
            f'https://www.gismeteo.ru/search/{loc}/',
            headers={
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:113.0) Gecko/20100101 Firefox/113.0'
            }
        ).content
        search = BeautifulSoup(search_data, 'html.parser')
        section = search.find(string='Населённые пункты ').find_parent('section')
        id = section.find(class_='link-item')['href'].split('-')[-1].replace('/','')
        
        weather_data = requests.get(
            f'https://www.gismeteo.ru/diary/{id}/{year}/{month}/',
            headers={
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:113.0) Gecko/20100101 Firefox/113.0'
            }
        ).content
        weather = BeautifulSoup(weather_data, 'html.parser')
        cache[key] = weather
    day_cell = weather.find('td', class_='first', string=day)
    if not day_cell:
        return None, None
    row = day_cell.find_parent('tr')
    row = [el for el in row.contents if el != '\n']
    temperature = row[1].contents[0]
    rain_cell = row[4].contents
    if len(rain_cell) == 0:
        rain = 0
    elif 'rain' in rain_cell[0]['src']:
        rain = 1
    else:
        rain = 0

    return temperature, rain

get_weather('Алтайский край', '2022-12-03')

('-18', 0)

In [90]:
temps = []
rains = []

i = 1
for index, row in df.iterrows():
    loc = row['region']
    date = row['order_date']
    temp, rain = get_weather(loc, date)
    temps.append(temp)
    rains.append(rain)
    print(f'{i}: {temp} | {rain}')
    i += 1

1: -18 | 0
2: None | None
3: +1 | 0
4: -2 | 0
5: 0 | 0
6: -14 | 0
7: +2 | 0
8: +22 | 0
9: +31 | 0
10: +10 | 0
11: +2 | 0
12: -2 | 0
13: 0 | 1
14: +9 | 0
15: +17 | 0
16: -13 | 0
17: +4 | 0
18: +12 | 0
19: -13 | 0
20: +6 | 0
21: +10 | 0
22: -16 | 0
23: +26 | 0
24: +18 | 0
25: 0 | 0
26: +19 | 0
27: +10 | 1
28: +25 | 0
29: +6 | 0
30: +4 | 0
31: +25 | 0
32: +15 | 0
33: -8 | 0
34: +14 | 1
35: +34 | 0
36: +17 | 0
37: +3 | 0
38: +22 | 0
39: +28 | 0
40: +5 | 0
41: +29 | 0
42: +28 | 0
43: 0 | 0
44: +12 | 0
45: +7 | 0
46: -15 | 0
47: +21 | 0
48: -1 | 0
49: -20 | 0
50: +31 | 0
51: +11 | 1
52: -1 | 0
53: -15 | 0
54: +15 | 0
55: -5 | 0
56: +11 | 0
57: +2 | 0
58: +25 | 0
59: +18 | 0
60: +28 | 0
61: -18 | 0
62: +4 | 0
63: +16 | 0
64: -3 | 0
65: +4 | 0
66: -1 | 0
67: +3 | 0
68: -5 | 0
69: -13 | 0
70: +3 | 1
71: +2 | 1
72: -10 | 0
73: +4 | 0
74: +15 | 0
75: -18 | 0
76: +25 | 0
77: -8 | 0
78: 0 | 0
79: -5 | 0
80: +2 | 0
81: -6 | 0
82: +9 | 0
83: +15 | 0
84: +2 | 0
85: +27 | 0
86: +5 | 1
87: +27 | 0
88: +

In [112]:
import bs4

for i in range(len(temps)):
    if temps[i] and type(temps[i]) is bs4.element.Tag:
        temps[i] = None
        rains[i] = None

In [125]:
weather_df = pd.DataFrame({'temperature': temps, 'rain': rains})


In [139]:
joined = pd.concat([df, weather_df], axis=1)
joined.to_csv('weather.csv')