In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from webdriver_manager.chrome import ChromeDriverManager
import time
from datetime import datetime
import db4

def find_streams(date):
    stream_query="select * from stream_scraping_ids where is_id_active is True"
    streams=db4.select(stream_query)


    tv_schedules_query=f"select * from tv_schedules where start_time>'{date}'"
    tv_sch=db4.select(tv_schedules_query)

    if tv_sch:
        streams_ids = {stream['stream_id'] for stream in streams}
        tv_sch_ids = {schedule['stream_id'] for schedule in tv_sch}

        missing_stream_ids = streams_ids - tv_sch_ids

        streams = [stream for stream in streams if stream['stream_id'] in missing_stream_ids]
        
    else:
        streams=streams
    return streams


def get_daily_program(streams):
    daily_programs = {}

    for i in streams:
        print(f"{i['stream_name']} started in {datetime.now()}")

        # Initialize driver and handle any setup errors
        try:
            chrome_options = Options()
            chrome_options.add_argument("--headless")
            chrome_options.add_argument("--disable-gpu")
            driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
        except Exception as e:
            print(f"Error initializing WebDriver for {i['stream_name']}: {e}")
            continue

        try:
            stream_scrape_id = i['id']
            url = f"https://tv.mail.ru/baku/channel/{stream_scrape_id}/"
            driver.get(url)
            time.sleep(5)
        except Exception as e:
            print(f"Error loading URL for {i['stream_name']}: {e}")
            driver.quit()
            continue

        try:
            spans = driver.find_elements(By.CSS_SELECTOR, "div.filter__list.js-more__list span.filter__item.js-group_date.js-more__item")
            target_date = datetime.now().strftime("%Y-%m-%d")

            date_found = False
            for span in spans:
                if span.get_attribute("data-value") == target_date:
                    ActionChains(driver).move_to_element(span).click(span).perform()
                    time.sleep(2)
                    date_found = True
                    break

            if not date_found:
                print(f"No data found for {i['stream_name']} on {target_date}. Moving to the next stream.")
                driver.quit()
                continue
        except Exception as e:
            print(f"Error interacting with date selection for {i['stream_name']}: {e}")
            driver.quit()
            continue

        try:
            times = driver.find_elements(By.CLASS_NAME, "p-programms__item__time")
            names = driver.find_elements(By.CLASS_NAME, "p-programms__item__name")

            if not times or not names:
                print(f"No program data available for {i['stream_name']} on {target_date}.")
                driver.quit()
                continue

            programs = {t.text: n.text for t, n in zip(times, names)}
            if programs:
                daily_programs[f"{i['stream_id']}"] = programs
        except Exception as e:
            print(f"Error extracting program data for {i['stream_name']}: {e}")
        finally:
            driver.quit()

        print(f"{i['stream_name']} finished in {datetime.now()}")

    return daily_programs

def clean_dict(dict):
    for key, value in dict.items():
        cleaned_value = value.split('.', 1)[0].strip()  
        cleaned_value = ' '.join(cleaned_value.split())  
        dict[key] = cleaned_value
    return dict

def insert_code_to_programs(daily_programs):
    missing_dict = {}
    insert_programs_query = "INSERT INTO programs (name, stream_id) VALUES (%s, %s)"

    for stream in daily_programs:
        try:
            missing = set()
            select_programs_query = f"""SELECT * FROM programs WHERE stream_id={stream}"""
            df = db4.select(select_programs_query)

            for _, program_name in daily_programs[stream].items():
                if not any(program_name == record['name'] for record in df):
                    missing.add(program_name)

            missing_dict[stream] = missing
        except Exception as e:
            print(f"Error processing stream {stream}: {e}")
            continue

        try:
            for program_name in missing:
                db4.insert(insert_programs_query, (program_name, stream))
        except Exception as e:
            print(f"Error inserting program '{program_name}' for stream {stream}: {e}")

    return missing_dict

def insert_code_to_tv_schedules(daily_programs):
    todays_date = datetime.now().date()
    target_date = todays_date.strftime('%Y-%m-%d')

    for stream in daily_programs:
        try:
            # Check if data already exists for the stream today

            for start_time, program_name in daily_programs[stream].items():
                try:
                    start_datetime = datetime.strptime(f'{target_date} {start_time}', '%Y-%m-%d %H:%M')

                    insert_schedules = """
                        INSERT INTO tv_schedules (start_time, program_id, stream_id)
                        SELECT 
                            %s AS start_time, p.id AS program_id, %s AS stream_id
                        FROM programs p
                        WHERE p.name = %s
                        LIMIT 1;
                    """

                    values = (start_datetime, stream, program_name)
                    db4.insert(insert_schedules, values)
                except Exception as e:
                    print(f"Error inserting program '{program_name}' for stream {stream} at {start_time}: {e}")
        except Exception as e:
            print(f"Error processing stream {stream}: {e}")




In [None]:
todays_date = datetime.now().date()-timedelta(days=0)

streams=find_streams(todays_date)

daily_programs=get_daily_program(streams)
try:
    for i in daily_programs:
        daily_programs[i]= clean_dict(daily_programs[i])
except ValueError as e:
    print(e)
    
missing_dict=insert_code_to_programs(daily_programs) 

insert_code_to_tv_schedules(daily_programs)
    

Processing stream 35 results
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the PostgreSQL database
Connection to the Pos

In [6]:
# len(daily_programs['29'])

In [7]:
# len(missing_dict['35'])