### Bibliotecas

In [1]:
from urllib.request import urlopen
import ssl
from bs4 import BeautifulSoup
import re
from datetime import datetime, timedelta
import pandas as pd
from ipywidgets import Dropdown

### Funciones

In [2]:
def retrieve_from_url(url='https://www.cinetecanacional.net/sedes/cartelera.php?cinemaId=003#gsc.tab=0'):
    # Ignore SSL certificate errors
    ctx = ssl.create_default_context()
    ctx.check_hostname = False
    ctx.verify_mode = ssl.CERT_NONE

    html = urlopen(url, context=ctx).read()
    return BeautifulSoup(html, "html.parser")

In [3]:
def get_dates(soup):
    return sorted(list({date for date in re.findall(r'cartelera\.php\?cinemaId=003&amp;dia=(\d{4}-\d{2}-\d{2})', str(soup)) if date >= datetime.today().strftime('%Y-%m-%d')}))

In [4]:
def get_movies_data(soup):
    movies_data={'Title': [], 'Starts': [], 'Ends': [], 'Duration':[]}
    cartelera = soup('div',id='cartelera')[0]
    movies = cartelera('div', class_='col-12 col-md-6 col-lg-4 float-left')
    for mov in movies:
        title = mov('p', class_='font-weight-bold text-uppercase text-decoration-none text-black')[0].text
        length = int(re.findall(r'Dur.: (\d+) mins.\)', str(mov))[0])
        shows = re.findall(r'(\d{2}:\d{2})', str(mov))
        for h in shows:
            begins = datetime.strptime(h, '%H:%M')
            ends = begins + timedelta(minutes=length)
            movies_data['Title'].append(title)
            movies_data['Starts'].append(begins.time())
            movies_data['Ends'].append(ends.time())
            movies_data['Duration'].append(length)
    return movies_data

In [5]:
def which_movies(df):
    df_out = pd.DataFrame(columns=('Title', 'Starts', 'Ends', 'Duration'))
    df_sorted = df.sort_values(by='Ends')
    while len(df_sorted) > 0:
        current_time = df_sorted['Ends'].iloc[0]
        df_out.loc[len(df_out)] = df_sorted.iloc[0].copy()
        df_sorted = df_sorted[df_sorted['Starts'] >= current_time]
    return df_out

### Ejecución

In [6]:
dates = get_dates(retrieve_from_url())
w = Dropdown(options=dates, value=dates[0], description='Select date:', disabled=False,)

In [7]:
display(w)

Dropdown(description='Select date:', options=('2024-09-12', '2024-09-13', '2024-09-14', '2024-09-15', '2024-09…

$\downarrow \downarrow \downarrow$ ***Volver a ejecutar las celdas de aquí hacia abajo si cambias la fecha***     $\downarrow \downarrow \downarrow$

In [10]:
movie_data = get_movies_data(retrieve_from_url('https://www.cinetecanacional.net/sedes/cartelera.php?cinemaId=003&dia=' + w.value + '#gsc.tab=0'))
df = pd.DataFrame(movie_data)
print('Cartelera del', w.value,)
df

Cartelera del 2024-09-17


Unnamed: 0,Title,Starts,Ends,Duration
0,An endless sunday,16:45:00,18:40:00,115
1,An endless sunday,21:00:00,22:55:00,115
2,Creatura,16:00:00,17:52:00,112
3,Creatura,20:30:00,22:22:00,112
4,Dormir con los ojos abiertos,16:00:00,17:37:00,97
5,Dormir con los ojos abiertos,20:15:00,21:52:00,97
6,El grosor del polvo,14:00:00,15:22:00,82
7,El grosor del polvo,18:15:00,19:37:00,82
8,Otra piel,15:00:00,16:10:00,70
9,Otra piel,19:15:00,20:25:00,70


In [11]:
print('Este es el máximo de películas que puedes ver el', w.value, 'en la Cineteca Nacional.')
print('Con un total de', len(which_movies(df)), 'películas, y un tiempo de', which_movies(df)['Duration'].sum(), 'minutos.')
print('Esto es aproximadamente', which_movies(df)['Duration'].sum()//60, 'horas y', which_movies(df)['Duration'].sum()%60, 'minutos.')
print('=D!')
which_movies(df)

Este es el máximo de películas que puedes ver el 2024-09-17 en la Cineteca Nacional.
Con un total de 4 películas, y un tiempo de 356 minutos.
Esto es aproximadamente 5 horas y 56 minutos.
=D!


Unnamed: 0,Title,Starts,Ends,Duration
0,Salaryman,14:00:00,15:20:00,80
1,Dormir con los ojos abiertos,16:00:00,17:37:00,97
2,El grosor del polvo,18:15:00,19:37:00,82
3,Dormir con los ojos abiertos,20:15:00,21:52:00,97
