In [1]:
#!/usr/bin/env python3
"""
Data Generator for MovieNet
"""

import datetime
import pandas as pd
import random

In [2]:
options = {
    'path': '../../../data/charts/',
    'date': {
        'max': '2020-01-01',
        'min': '1970-01-01'
    },
    'gross': {
        'max': float('inf'),
        'min': 1
    },
    'theater_count': {
        'max': float('inf'),
        'min': 1
    },
    'day_in_release': {
        'max': float('inf'),
        'min': 1
    },
    'week_in_release': {
        'max': float('inf'),
        'min': 1
    },
}

In [3]:
def generate(mode, pool_size=6, **options):
    """
    Generate random combinations of theatrical box office competition
    """
    try:
        path = options['path'] + mode + '.csv'
    except:
        path = '../../../data/charts/' + mode + '.csv'
    
    chart = pd.read_csv(path, index_col=0, usecols=[0, 3, 5, 7, 10])
    
    for k, v in options.items():
        if k == 'date':
            chart = chart.loc[v['min']:v['max']]
        elif k in chart.columns:
            chart = chart.loc[chart[k].between(v['min'], v['max'])]
    
    while True:
        date = chart.index[random.randrange(len(chart))]
        pool = chart.loc[date]
        
        n = len(pool)
        if n < pool_size:
            continue
        
        sample = {'date': datetime.date(*[int(a) for a in date.split('-')])}
        
        for i, e in enumerate(random.sample(range(n), pool_size)):
            sample[i] = {j: pool.iloc[e][j] for j in chart.columns}
        
        yield sample

In [4]:
generator = generate('weekend', **options)

next(generator)

{'date': datetime.date(2016, 9, 9),
 0: {'gross': 419113,
  'movie_ID': 'Finding-Dory',
  'theater_count': 490,
  'week_in_release': 13},
 1: {'gross': 4143,
  'movie_ID': 'Germans-and-Jews',
  'theater_count': 2,
  'week_in_release': 14},
 2: {'gross': 2113497,
  'movie_ID': 'No-Manches-Frida',
  'theater_count': 465,
  'week_in_release': 2},
 3: {'gross': 60827,
  'movie_ID': 'Hillarys-America-The-Secret-History-of-the-Democratic-Party',
  'theater_count': 169,
  'week_in_release': 9},
 4: {'gross': 90361,
  'movie_ID': 'Captain-Fantastic',
  'theater_count': 94,
  'week_in_release': 10},
 5: {'gross': 16760,
  'movie_ID': 'Independence-Day-Resurgence',
  'theater_count': 42,
  'week_in_release': 12}}