In [1]:
import pandas as pd
import numpy as np

In [2]:
def get_data(csv_filepath: str) -> pd.DataFrame:
    """
    Reads a CSV file from the given filepath and returns it as a pandas DataFrame.

    Args:
        csv_filepath (str): The path to the CSV file.

    Returns:
        pd.DataFrame: The data from the CSV file as a pandas DataFrame.
    """
    data = pd.read_csv(csv_filepath)
    return data

In [3]:
filepath = './results/schedule.csv'
data = get_data(filepath)
data = data[['cinema', 'date', 'title_excerpt']]
data = data.rename(columns={'title_excerpt': 'title'})
data.head()

Unnamed: 0,cinema,date,title
0,Метрополь,01.09.1953,АРЕНА СМЕЛЫХ
1,Метрополь,01.09.1953,ПАРМСКАЯ ОБИТЕЛЬ
2,Метрополь,01.09.1953,ЧУН И ГЕК
3,Ударник,01.09.1953,ПАРМСКАЯ ОБИТЕЛЬ
4,Ударник,01.09.1953,ПРАЗДНИК СВ. ИОРГЕНА


In [4]:
# Calculate the first day of screening and screening days in a row for each cinema
def calc():
    screening_info = []

    i = -1
    for cinema, group in data.groupby('cinema'):
        i += 1
        group['date'] = pd.to_datetime(group['date'], format='%d.%m.%Y')

        group = group.sort_values(by='date')

        group['next_date'] = group['date'].shift(-1)
        group['diff'] = (group['next_date'] - group['date']).dt.days
        group['screening_days'] = (group['diff'] != 1).cumsum()

        for title, title_group in group.groupby(['screening_days', 'title']):
            first_day = title_group['date'].min().strftime('%d/%m/%Y')
            screening_days = len(title_group)
            screening_info.append([cinema, first_day, screening_days, title])

    screening_df = pd.DataFrame(screening_info, columns=['Cinema', 'First day of screening', 'Screening Days', 'Title'])
    screening_df.head(10)

In [None]:
screening_info = []

for (cinema, title), group in data.groupby(['cinema', 'title']):
    group['date'] = pd.to_datetime(group['date'], format='%d.%m.%Y')
    group = group.sort_values(by='date')

    # print(group[['cinema', 'date', 'title']].head())

    for index, row in group.iterrows():
        prev_date = row['date']
        screening_days = 0

        next_rows = group.loc[index:, ['cinema', 'date', 'title']]

        for _, row2 in next_rows.iterrows():
            print(cinema, title, row2.to_list())

            if (row2['cinema'] != cinema) or (row2['title'] != title):
                break

            # we expect to count the current row as well, since it is included into next_rows
            if (row2['date'] - prev_date).days <= 1:
                prev_date = row2['date']
                screening_days += 1

        screening_info.append([cinema, row['date'].strftime('%d/%m/%Y'), screening_days, title])

screening_df = pd.DataFrame(screening_info, columns=['Cinema', 'First day of screening', 'Screening Days', 'Title'])
screening_df.head(10)

Метрополь АННА ПРОЛЕТАРКА ['Метрополь', Timestamp('1953-09-16 00:00:00'), 'АННА ПРОЛЕТАРКА']
Метрополь АРЕНА СМЕЛЫХ ['Метрополь', Timestamp('1953-09-01 00:00:00'), 'АРЕНА СМЕЛЫХ']
Метрополь АРЕНА СМЕЛЫХ ['Метрополь', Timestamp('1953-09-02 00:00:00'), 'АРЕНА СМЕЛЫХ']
Метрополь АРЕНА СМЕЛЫХ ['Метрополь', Timestamp('1953-09-03 00:00:00'), 'АРЕНА СМЕЛЫХ']
Метрополь АРЕНА СМЕЛЫХ ['Метрополь', Timestamp('1953-09-04 00:00:00'), 'АРЕНА СМЕЛЫХ']
Метрополь АРЕНА СМЕЛЫХ ['Метрополь', Timestamp('1953-09-07 00:00:00'), 'АРЕНА СМЕЛЫХ']
Метрополь АРЕНА СМЕЛЫХ ['Метрополь', Timestamp('1953-09-08 00:00:00'), 'АРЕНА СМЕЛЫХ']
Метрополь АРЕНА СМЕЛЫХ ['Метрополь', Timestamp('1953-09-09 00:00:00'), 'АРЕНА СМЕЛЫХ']
Метрополь АРЕНА СМЕЛЫХ ['Метрополь', Timestamp('1953-09-10 00:00:00'), 'АРЕНА СМЕЛЫХ']
Метрополь АРЕНА СМЕЛЫХ ['Метрополь', Timestamp('1953-09-11 00:00:00'), 'АРЕНА СМЕЛЫХ']
Метрополь АРЕНА СМЕЛЫХ ['Метрополь', Timestamp('1953-09-12 00:00:00'), 'АРЕНА СМЕЛЫХ']
Метрополь АРЕНА СМЕЛЫХ ['Метрополь', 

Unnamed: 0,Cinema,First day of screening,Screening Days,Title
0,Метрополь,16/09/1953,0,АННА ПРОЛЕТАРКА
1,Метрополь,01/09/1953,3,АРЕНА СМЕЛЫХ
2,Метрополь,02/09/1953,2,АРЕНА СМЕЛЫХ
3,Метрополь,03/09/1953,1,АРЕНА СМЕЛЫХ
4,Метрополь,04/09/1953,0,АРЕНА СМЕЛЫХ
5,Метрополь,07/09/1953,5,АРЕНА СМЕЛЫХ
6,Метрополь,08/09/1953,4,АРЕНА СМЕЛЫХ
7,Метрополь,09/09/1953,3,АРЕНА СМЕЛЫХ
8,Метрополь,10/09/1953,2,АРЕНА СМЕЛЫХ
9,Метрополь,11/09/1953,1,АРЕНА СМЕЛЫХ
