Due to shortage of hands to manually fill out the Microsoft Access Forms dedicated to Data Entry, the database was programmatically populated via the Faker library. Here's how...

Firstly, we'll establish a connection to movie_production_companies database in MySQL via configparser and pymysql library

In [1]:
import pymysql
import configparser


def get_connection():
    """Establishes and returns a database connection using pymysql."""
    # Loading database configuration from a config.ini file
    config_parser = configparser.ConfigParser()
    config_parser.read('config.ini')
    
    # Database connection configuration
    config = {
        'host': config_parser.get('database', 'host'),
        'user': config_parser.get('database', 'user'),
        'password': config_parser.get('database', 'password'),
        'database': config_parser.get('database', 'schema'),
        'charset': 'utf8mb4',
        'cursorclass': pymysql.cursors.DictCursor  # Using DictCursor to work with dictionaries
    }
    
    # Attempting to establish a database connection
    try:
        connection = pymysql.connect(**config)
        print("Connection successful")
        return connection
    except Exception as e:
        print(f"Error connecting to the database: {e}")
        return None


Now the get_connection function is defined, I proceeded to populating the database starting from the _company_ table. Here's how...

In [4]:
from faker import Faker
import nbimporter
from comp_name_provider import MovieCompanyNameProvider


fake = Faker()
fake.add_provider(MovieCompanyNameProvider)

def generate_address():
    full_address = fake.address()

    # Removing newline characters to avoid splitting the address across multiple lines
    full_address_single_line = full_address.replace('\n', ', ')

    # Splitting the modified address at the first comma and take the first part
    address_first_part = full_address_single_line.split(',', 1)[0]

    return address_first_part    

def generate_company_data(num_records):
    connection = get_connection()
    try:
        with connection.cursor() as cursor:
            # Fetch city IDs
            cursor.execute("SELECT id FROM city")
            city_ids = [row['id'] for row in cursor.fetchall()]
            
            # Fetch kind_of_organization IDs
            cursor.execute("SELECT id FROM kind_of_organization")
            organization_ids = [row['id'] for row in cursor.fetchall()]
            
            for _ in range(num_records):
                name = fake.unique.movie_company_name()
                address = generate_address()
                zip_code = fake.postcode()
                city_id = fake.random.choice(city_ids)

                # country_code are set by triggers based on city_id  and registration_body_id are set by triggers based on country_code

                kind_of_organization_id = fake.random.choice(organization_ids)

                # ensuring the total liability is one digit lesser than the total asset, so as not to encounter negative net values 
                asset_digit = fake.random_int(min=6, max=8) 
                total_asset = round(fake.random_number(digits=asset_digit), 2)
                total_liability = round(fake.random_number(digits=asset_digit-1), 2)

                registration_date = fake.date_between(start_date='-15y', end_date='today')
                
                sql = """
                INSERT INTO company (name, address, zip_code, city_id, kind_of_organization_id, total_asset, total_liability, registration_date)
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
                """
                cursor.execute(sql, (name, address, zip_code, city_id, kind_of_organization_id, total_asset, total_liability, registration_date))
            
            connection.commit()
            print(f"{num_records} companies inserted.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        connection.close()


generate_company_data(505)

Connection successful
505 companies inserted.


Next, the _shareholder_ table.

In [23]:
from faker import Faker
import random

fake = Faker()

def generate_phone_number():
    # Using numerify to ensure consistent format
    phone_number = fake.numerify(text='(###) ###-###-####')
    return phone_number

def insert_shareholders(num_records):
    connection = get_connection()
    if connection is None:
        print("Failed to connect to database. Exiting...")
        return

    try:
        with connection.cursor() as cursor:
            cursor.execute("SELECT code FROM country")
            country_codes = [row['code'] for row in cursor.fetchall()]

            for _ in range(num_records):
                # Generate fake data for each field in the shareholder table
                first_name = fake.first_name()
                last_name = fake.last_name()
                
                country_code = random.choice(country_codes)  # Randomly choose a country code
                place_of_birth = fake.city()
                mothers_maiden_name = fake.last_name_female()
                fathers_first_name = fake.first_name_male()
                personal_telephone = generate_phone_number()
                national_insurance_number = fake.ssn()  # Assuming SSN can serve as an insurance number
                passport_number = fake.bothify(text='??######', letters='ABCDEFGHIJKLMNOPQRSTUVWXYZ')  # Generate a fake passport number
                
                # SQL command to insert a new shareholder
                sql = """
                INSERT INTO shareholder (first_name, last_name, country_code, place_of_birth, mothers_maiden_name, fathers_first_name, personal_telephone, national_insurance_number, passport_number)
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
                """
                cursor.execute(sql, (first_name, last_name, country_code, place_of_birth, mothers_maiden_name, fathers_first_name, personal_telephone, national_insurance_number, passport_number))

            # Commit the transaction
            connection.commit()
            print(f"{num_records} shareholders inserted successfully.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        connection.close()


insert_shareholders(3005)


Connection successful
3005 shareholders inserted successfully.


Next, the _company shareholder_ table. The junction table that establishes a many-to-many relationship between the _company_ and the _shareholder_ table.

In [24]:
import random

def insert_company_shareholders(max_shareholders_per_company=30, max_companies_per_shareholder=25):
    connection = get_connection()
    if connection is None:
        print("Failed to connect to database. Exiting...")
        return

    try:
        with connection.cursor() as cursor:
            cursor.execute("SELECT id FROM company")
            company_ids = [row['id'] for row in cursor.fetchall()]

            cursor.execute("SELECT id FROM shareholder")
            shareholder_ids = [row['id'] for row in cursor.fetchall()]

            # Track the number of companies assigned to each shareholder
            shareholder_companies_count = {shareholder_id: 0 for shareholder_id in shareholder_ids}

            # Keep track of company-shareholder pairs to avoid duplicates
            company_shareholder_pairs = set()

            # Initial assignment to ensure every shareholder is accounted for at least once
            for shareholder_id in shareholder_ids:
                company_id = random.choice(company_ids)
                sql = "INSERT INTO company_shareholder (company_id, shareholder_id) VALUES (%s, %s)"
                cursor.execute(sql, (company_id, shareholder_id))
                company_shareholder_pairs.add((company_id, shareholder_id))
                shareholder_companies_count[shareholder_id] += 1

            # Additional assignments based on random selection
            for company_id in company_ids:
                # Randomly determine the number of shareholders for this company
                num_shareholders = random.randint(2, max_shareholders_per_company)
                assigned_shareholders = 0

                while assigned_shareholders < num_shareholders:
                    shareholder_id = random.choice(shareholder_ids)

                    # Check if this shareholder can have more companies and if the pair is unique
                    if shareholder_companies_count[shareholder_id] < max_companies_per_shareholder and (company_id, shareholder_id) not in company_shareholder_pairs:
                        
                        sql = "INSERT INTO company_shareholder (company_id, shareholder_id) VALUES (%s, %s)"
                        cursor.execute(sql, (company_id, shareholder_id))
                        company_shareholder_pairs.add((company_id, shareholder_id))
                        shareholder_companies_count[shareholder_id] += 1
                        assigned_shareholders += 1

            connection.commit()
            print(f"All shareholders accounted for and company_shareholder records inserted successfully.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        connection.close()


insert_company_shareholders()


Connection successful
All shareholders accounted for and company_shareholder records inserted successfully.


Next, the _grant request_ table.

In [12]:
from faker import Faker
import random

fake = Faker()

def insert_grant_requests(num_records):
    connection = get_connection()
    if connection is None:
        print("Failed to connect to database. Exiting...")
        return

    try:
        with connection.cursor() as cursor:
            for _ in range(num_records):
                # Generate a title for the grant request
                title = fake.catch_phrase()
                
                # Customized funding organization names
                funding_organizations = [
                    'Foundation', 'Council', 'Federation', 'Institute', 'Endowment', 'Fund', 'Trust'
                ]
                funding_organization = f"{fake.company()} {random.choice(funding_organizations)}"
                
                # Generate monetary values and dates
                maximum_monetary_value = round(random.uniform(10000, 100000), 2)
                desired_amount = round(maximum_monetary_value * random.uniform(0.5, 0.9), 2)
                application_date = fake.date_between(start_date="-2y", end_date="today")
                deadline = fake.date_between(start_date="today", end_date="+1y")
                status = random.choice(['Approved', 'Denied', 'Pending'])

                # SQL command to INSERT data
                sql = """
                INSERT INTO grant_request (title, funding_organization, maximum_monetary_value, desired_amount, application_date, deadline, status)
                VALUES (%s, %s, %s, %s, %s, %s, %s)
                """
                cursor.execute(sql, (title, funding_organization, maximum_monetary_value, desired_amount, application_date, deadline, status))

            # Commit the transaction
            connection.commit()
            print(f"{num_records} grant requests inserted successfully.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        connection.close()

# Example usage: Insert 10 new grant requests
insert_grant_requests(1005)

Connection successful
1005 grant requests inserted successfully.


Next, the _company grant_ table. The junction table that establishes a many-to-many relationship between the _company_ and the _grant request_ tables

In [13]:
import random

def insert_company_grants(max_grants_per_company=15, max_companies_per_grant=5):
    connection = get_connection()
    if connection is None:
        print("Failed to connect to database. Exiting...")
        return

    try:
        with connection.cursor() as cursor:
            cursor.execute("SELECT id FROM company")
            company_ids = [row['id'] for row in cursor.fetchall()]

            cursor.execute("SELECT id FROM grant_request")
            grant_ids = [row['id'] for row in cursor.fetchall()]

            # Track the number of companies assigned to each grant
            grant_companies_count = {grant_id: 0 for grant_id in grant_ids}
            # Keep track of company-grant pairs to avoid duplicates
            company_grant_pairs = set()

            # Initial assignment to ensure every grant is accounted for at least once
            for grant_id in grant_ids:
                company_id = random.choice(company_ids)
                sql = "INSERT INTO company_grant (company_id, grant_id) VALUES (%s, %s)"
                cursor.execute(sql, (company_id, grant_id))
                company_grant_pairs.add((company_id, grant_id))
                grant_companies_count[grant_id] += 1

            # Additional assignments based on random selection
            for company_id in company_ids:
                # Randomly determine the number of grants for this company
                num_grants = random.randint(1, max_grants_per_company) # Ensure at least one grant
                assigned_grants = 0

                while assigned_grants < num_grants:
                    grant_id = random.choice(grant_ids)

                    # Check if this grant can have more companies and if the pair is unique
                    if grant_companies_count[grant_id] < max_companies_per_grant and (company_id, grant_id) not in company_grant_pairs:
                        sql = "INSERT INTO company_grant (company_id, grant_id) VALUES (%s, %s)"
                        cursor.execute(sql, (company_id, grant_id))
                        company_grant_pairs.add((company_id, grant_id))
                        grant_companies_count[grant_id] += 1
                        assigned_grants += 1

            connection.commit()
            print(f"All grants accounted for and company_grants records inserted successfully.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        connection.close()

insert_company_grants()


Connection successful
All grants accounted for and company_grants records inserted successfully.


Next, the _film_ table

In [15]:
import random
from faker import Faker
from datetime import datetime
import re

fake = Faker()

def split_camel_case(color_name):
    spaced_color_name = ' '.join(re.findall(r'[A-Z](?:[a-z]+|[A-Z]*(?=[A-Z]|$))', color_name))
    return spaced_color_name

unique_titles = set()

def generate_movie_title(unique_titles):
    while True:
        title_prefixes = ['The Return of', 'Revenge of', 'Rise of', 'The Fall of', 'The Chronicles of', 'Escape from', 'Battle for', 'Attack of']
        title_suffixes = [': The Last Key', ': A New Hope', ': The Secret Service', ': The Golden Age', ': Endgame', ': Infinity War', ': Dark Fate']
        
        prefix = ["{city}"] 
        suffix = ["{century}", "{year}"]

        # Corrected patterns to include placeholders for Python's str.format
        patterns = [
            "{color_name} {month}", 
            "{noun} of {country}",
            "{adjective} {city}",
            "{first_name}, the {century}",
            "{first_name} {last_name}: {noun} of {city}",
            "How to be a {job} in {country}",
            "{century}:Shades of {color_name}",
            "{color_name} {noun} in {city}",
            "{first_name} and the {adjective} {noun}",
            "The {last_name} Legacy",
            "The {country} Conspiracy",
            "{year} in {country}", "{month} {year} in {country}",
            "{city}'s {noun}",
            # "{noun}", "{year}", "{century}", "{time}", "{color_name}", "{country}",
            "From {country} with {noun}",
            "{century}: The {noun}",
            "Every {month} in {city}",
            "The {adjective} of {noun}",
            "{first_name_male} and {first_name_female}",
            "{noun}: A {city} Story",
            "The {adjective} {noun}",
            "{century}: Rise of {country}",
            "{city}: Age of {color_name}",
            "Tales of {city}",
            "{time} in {city}",
            random.choice(title_prefixes) + " " + random.choice(prefix),
            random.choice(suffix) + random.choice(title_suffixes),
            ]

        # Randomly select a pattern
        pattern = random.choice(patterns)

        # Generate a title based on the selected pattern
        title = pattern.format(
            color_name = split_camel_case(fake.color_name()),
            country = fake.country(),
            city = fake.city(),
            noun = fake.word().capitalize(),
            adjective = fake.word().capitalize(),
            first_name = fake.first_name(),
            first_name_male = fake.first_name_male(),
            first_name_female = fake.first_name_female(),
            last_name = fake.last_name(),
            job = fake.job().split(',')[0],
            century = fake.century(),
            time = fake.time(pattern='%H:%M'),
            month = fake.month_name(),
            year = fake.year()
        )
        if title not in unique_titles:
            unique_titles.add(title)
            return title


def generate_and_insert_films(num_records):
    connection = get_connection()
    if connection is None:
        print("Failed to connect to database. Exiting...")
        return

    try:
        with connection.cursor() as cursor:

            for _ in range(num_records):
                title = generate_movie_title(unique_titles)
                first_released = fake.date_between(start_date='-35y', end_date='today')
                release_year = first_released.strftime("%Y")

                sql = "INSERT INTO film (title, release_year, first_released) VALUES (%s, %s, %s)"
                cursor.execute(sql, (title, release_year, first_released))

            connection.commit()
            print(f"{num_records} films inserted successfully.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        connection.close()

generate_and_insert_films(6385)

Connection successful
6385 films inserted successfully.


Next, the _company film_ table. The junction table that establishes a many-to-many relationship between the _company_ and the _film_ tables allowing for co-production in the film industry.

In [18]:
import random

def insert_company_film(max_films_per_company=24, max_companies_per_film=3):
    connection = get_connection()
    if connection is None:
        print("Failed to connect to database. Exiting...")
        return

    try:
        with connection.cursor() as cursor:
            cursor.execute("SELECT id FROM company")
            company_ids = [row['id'] for row in cursor.fetchall()]

            cursor.execute("SELECT movie_code FROM film")
            film_ids = [row['movie_code'] for row in cursor.fetchall()]

            # Track the number of companies assigned to each film
            film_companies_count = {film_movie_code: 0 for film_movie_code in film_ids}
            # Keep track of company-film pairs to avoid duplicates
            company_film_pairs = set()

            # Initial assignment to ensure every film is accounted for at least once
            for film_movie_code in film_ids:
                company_id = random.choice(company_ids)
                sql = "INSERT INTO company_film (company_id, film_movie_code) VALUES (%s, %s)"
                cursor.execute(sql, (company_id, film_movie_code))
                company_film_pairs.add((company_id, film_movie_code))
                film_companies_count[film_movie_code] += 1

            # Additional assignments based on random selection
            for company_id in company_ids:
                # Randomly determine the number of films for this company
                num_films = random.randint(1, max_films_per_company) # Ensure at least one film
                assigned_film = 0

                while assigned_film < num_films:
                    film_movie_code = random.choice(film_ids)

                    # Check if this film can have more companies and if the pair is unique
                    if film_companies_count[film_movie_code] < max_companies_per_film and (company_id, film_movie_code) not in company_film_pairs:
                        sql = "INSERT INTO company_film (company_id, film_movie_code) VALUES (%s, %s)"
                        cursor.execute(sql, (company_id, film_movie_code))
                        company_film_pairs.add((company_id, film_movie_code))
                        film_companies_count[film_movie_code] += 1
                        assigned_film += 1

            connection.commit()
            print(f"All films accounted for and company_film records inserted successfully.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        connection.close()

insert_company_film()


Connection successful


All films accounted for and company_film records inserted successfully.


Time to generate the dummy Employees for the _employee_ table.

In [25]:
from faker import Faker
import random

fake = Faker()

def insert_employees_and_related_data():
    connection = get_connection()
    if connection is None:
        print("Failed to connect to database. Exiting...")
        return

    try:
        with connection.cursor() as cursor:
            cursor.execute("SELECT id FROM company")
            companies = [company['id'] for company in cursor.fetchall()]

            cursor.execute("SELECT id, name FROM role")
            roles = [role['name'] for role in cursor.fetchall()]
            cursor.execute("SELECT id, name FROM department")
            departments = [department['name'] for department in cursor.fetchall()]

            for company_id in companies:
                num_employees_per_company = random.randint(500, 1000)
                for _ in range(num_employees_per_company):
                    gender = random.choice(['male', 'female'])
                    first_name = fake.first_name_male() if gender == 'male' else fake.first_name_female()
                    middle_name = fake.first_name_male() if gender == 'male' else fake.first_name_female()
                    last_name = fake.last_name()
                    dob = fake.date_of_birth(minimum_age=18, maximum_age=65)
                    date_of_birth = dob.strftime('%Y-%m-%d')

                    if dob.year < 1990:
                        date_started = fake.date_between(start_date='-20y', end_date='today').strftime('%Y-%m-%d')
                    else:
                        date_started = fake.date_between(start_date='-10y', end_date='today').strftime('%Y-%m-%d')

                    # Assign a role or department randomly
                    employee_role = random.choice(roles + departments)

                    gender = 'M' if gender == 'male' else 'F'

                    sql_employee = """
                    INSERT INTO employee (company_id, first_name, middle_name, last_name, gender, date_of_birth, date_started, employee_role) 
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
                    """
                    cursor.execute(sql_employee, (company_id, first_name, middle_name, last_name, gender, date_of_birth, date_started, employee_role))
                 
                       
            connection.commit()
            print(f"Inserted employees into all {len(companies)} companies.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        if connection:
            connection.close()

insert_employees_and_related_data() 


Connection successful
Inserted employees into all 505 companies.


Next, the _phone number_ table

In [26]:
from faker import Faker
import random

fake = Faker()

def insert_phone_numbers():
    connection = get_connection()
    if connection is None:
        print("Failed to connect to database. Exiting...")
        return
    
    try:
        with connection.cursor() as cursor:
            # Fetch all employee_id
            cursor.execute("SELECT id FROM employee")
            employee_ids = [row['id'] for row in cursor.fetchall()]

            phone_description = ['Mobile', 'Work', 'Home']
                        
            for employee_id in employee_ids:

                picker = random.randint(1, len(phone_description))
                descriptions = random.sample(phone_description, picker)
                for description in descriptions:

                    # phone number table id is auto incremented
                    # employee phone number is auto generated with a db logic
                    # phone description is only what must be selected for each employee
                    
                    # Insert data into phone_number table
                    sql = """
                    INSERT INTO phone_number (employee_id, description) 
                    VALUES (%s, %s)
                    """
                    cursor.execute(sql, (employee_id, description))

            connection.commit()
            print("Inserted phone numbers for all employees.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        connection.close()

insert_phone_numbers()

Connection successful
Inserted phone numbers for all employees.


Next, the _staff salary_ table

In [7]:
import random

def insert_staff_salaries():
    connection = get_connection()
    if connection is None:
        print("Failed to connect to database. Exiting...")
        return
    
    try:
        with connection.cursor() as cursor:
            # Fetch all staff_ids and their department_ids
            cursor.execute("SELECT staff_id, department_id FROM staff")
            staff_info = cursor.fetchall()

            executive_id = 1  # this is the ID for the executive management department
            working_hours_options = ['Full-time', 'Part-time']
            job_levels = ['Entry', 'Mid', 'Senior']

            for member in staff_info:
                staff_id = member['staff_id']
                department = member['department_id']
                
                if department == executive_id:
                    working_hours = 'Full-time'
                    job_level = 'Executive'
                    salary = round(random.uniform(20000, 35000), 2)
                else:
                    working_hours = random.choice(working_hours_options)
                    job_level = random.choice(job_levels)
                    if job_level in ['Entry', 'Mid']:
                        if working_hours == 'Part-time':
                            salary = round(random.uniform(700, 2499), 2)
                        else:  # Full-time
                            salary = round(random.uniform(2500, 4999), 2)
                    else:  # Senior
                        if working_hours == 'Part-time':
                            salary = round(random.uniform(6000, 9999), 2)
                        else:  # Full-time
                            salary = round(random.uniform(10000, 19999), 2)

                # Execute SQL for all staff, including executive
                sql_salary = """
                INSERT INTO staff_salary (staff_id, working_hours, job_level, salary) VALUES (%s, %s, %s, %s)
                """
                cursor.execute(sql_salary, (staff_id, working_hours, job_level, salary))

            connection.commit()
            print("Inserted salaries for all staff members in every company.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        connection.close()

insert_staff_salaries()


Connection successful
Inserted salaries for all staff members in every company.


The _department address_ table is next to populate

In [27]:
from faker import Faker

fake = Faker()

def insert_department_addresses():
    connection = get_connection()
    if connection is None:
        print("Failed to connect to database. Exiting...")
        return

    try:
        with connection.cursor() as cursor:
            # Fetch all companies
            cursor.execute("SELECT id FROM company")
            companies = [row['id'] for row in cursor.fetchall()]

            # Fetch all departments
            cursor.execute("SELECT id FROM department")
            departments = [row['id'] for row in cursor.fetchall()]

            for company_id in companies:
                for department_id in departments:
                    # Generate fake address data
                    building = fake.street_name()
                    address = fake.street_address()
                    
                    # Insert data into department_address table
                    sql = """
                    INSERT INTO department_address (department_id, company_id, building, address) 
                    VALUES (%s, %s, %s, %s)
                    """
                    cursor.execute(sql, (department_id, company_id, building, address))

            connection.commit()
            print(f"Inserted department addresses for all companies.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        connection.close()

insert_department_addresses()


Connection successful
Inserted department addresses for all companies.


Lastly, the _crew info_ table

In [63]:
from faker import Faker
import random

fake = Faker()

def insert_crew_info():
    connection = get_connection()
    if connection is None:
        print("Failed to connect to database. Exiting...")
        return

    try:
        with connection.cursor() as cursor:
            # Fetch movie_codes with their respective company_ids
            cursor.execute("SELECT movie_code, company_id FROM film")
            movies = cursor.fetchall()

            # Fetch crew members with their roles and company affiliations
            cursor.execute("""
            SELECT c.crew_id, c.role_id, e.company_id 
            FROM crew c
            JOIN employee e ON c.crew_id = e.id
            """)
            crew_members = cursor.fetchall()

            # Define the actor role ID (assuming you know which ID corresponds to actors in your 'role' table)
            actor_role_id = 6

            for movie in movies:
                movie_code = movie['movie_code']
                movie_company_id = movie['company_id']

                # Filter crew members who belong to the company that produced the movie
                eligible_crew = [cm for cm in crew_members if cm['company_id'] == movie_company_id]

                # Randomly select a subset of eligible crew members for the movie
                selected_crew_ids = random.sample(eligible_crew, k=min(len(eligible_crew), random.randint(20, 50)))  # 20-50 crew members

                for crew in selected_crew_ids:
                    crew_id = crew['crew_id']
                    role_id = crew['role_id']

                    # Financial details initialization
                    scene_bonus = None
                    
                    # Only actors may receive a scene bonus
                    if role_id == actor_role_id and random.choice([True, False]):
                        scene_bonus = random.uniform(20, 200)

                    hourly_rate = random.uniform(20, 50)
                    daily_bonus = random.uniform(50, 200)
                    completion_bonus = random.uniform(500, 3000)
                    contractual_incentive = random.uniform(1000, 5000)

                    # Ensure at least two financial details are provided
                    compensation1 = [hourly_rate, daily_bonus]
                    compensation2 = [completion_bonus, contractual_incentive]

                    # Select one compensation type from each list
                    selected_compensation1 = random.choice(compensation1)
                    selected_compensation2 = random.choice(compensation2)

                   
                    # Assign values to the selected compensation types
                    if selected_compensation1 == 'hourly_rate':
                        hourly_rate = round(random.uniform(20, 50), 2)  # E.g., $20 to $50 per hour
                    elif selected_compensation1 == 'daily_bonus':
                        daily_bonus = round(random.uniform(50, 200), 2)  # E.g., $50 to $200 per day

                    if selected_compensation2 == 'completion_bonus':
                        completion_bonus = round(random.uniform(500, 3000), 2)  # E.g., $500 to $3000 upon completion
                    elif selected_compensation2 == 'contractual_incentive':
                        contractual_incentive = round(random.uniform(1000, 5000), 2)  # E.g., $1000 to $5000 as a contractual incentive

                    # Insert data into crew_info table
                    sql = """
                    INSERT INTO crew_info (crew_id, movie_code, role_id, scene_bonus, hourly_rate, daily_bonus, completion_bonus, contractual_incentive) 
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
                    """
                    cursor.execute(sql, (crew_id, movie_code, role_id, scene_bonus, hourly_rate, daily_bonus, completion_bonus, contractual_incentive))

            connection.commit()
            print("Crew info on payment and movie involvement inserted successfully.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        if connection:
            connection.close()

insert_crew_info()                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             

Connection successful
Crew info on payment and movie involvement inserted successfully.


In [None]:
from faker import Faker
import random

fake = Faker()

def insert_crew_info():
    connection = get_connection()
    if connection is None:
        print("Failed to connect to database. Exiting...")
        return

    try:
        with connection.cursor() as cursor:
            # Fetch movie_codes with their respective company_ids
            cursor.execute("SELECT movie_code, company_id FROM film")
            movies = cursor.fetchall()

            # Fetch the first 30 roles (assuming they are ordered by importance or ID)
            cursor.execute("SELECT id FROM role ORDER BY id LIMIT 30")
            first_30_roles = [role['id'] for role in cursor.fetchall()]
            
            # Fetch all roles for later use
            cursor.execute("SELECT id, name FROM role")
            all_roles = cursor.fetchall()

            # Fetch crew members with their roles and company affiliations
            cursor.execute("""
            SELECT c.crew_id, c.role_id, e.company_id 
            FROM crew c
            JOIN employee e ON c.crew_id = e.id
            """)
            crew_members = cursor.fetchall()

            # Define the actor role ID
            actor_role_id = 6  # Assuming 6 is the role ID for actors

            for movie in movies:
                movie_code = movie['movie_code']
                movie_company_id = movie['company_id']
                
                # Ensure each of the first 30 roles is represented once
                for role_id in first_30_roles:
                    eligible_crew = [cm for cm in crew_members if cm['company_id'] == movie_company_id and cm['role_id'] == role_id]
                    
                    # Skip if no eligible crew for this role in this company
                    if not eligible_crew:
                        continue
                    
                    # Special handling for actors
                    if role_id == actor_role_id:
                        # If actors, select between 15 and 40 actors if available
                        num_actors = min(max(len(eligible_crew), 15), 40)
                        selected_actors = random.sample(eligible_crew, k=num_actors)
                        for actor in selected_actors:
                            insert_crew_data(cursor, actor, movie_code)
                    else:
                        # For other roles, select only one crew member
                        selected_crew = random.choice(eligible_crew)
                        insert_crew_data(cursor, selected_crew, movie_code)

                # Optionally, add additional crew members from the remaining roles
                additional_roles = [role for role in all_roles if role['id'] not in first_30_roles and role['id'] != actor_role_id]
                for role in additional_roles:
                    eligible_crew = [cm for cm in crew_members if cm['company_id'] == movie_company_id and cm['role_id'] == role['id']]
                    if eligible_crew:
                        selected_crew = random.choice(eligible_crew)
                        insert_crew_data(cursor, selected_crew, movie_code)

            connection.commit()
            print("Crew info on payment and movie involvement inserted successfully.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        if connection:
            connection.close()

def insert_crew_data(cursor, crew_member, movie_code):
    # Example insert function, add your own fields and financial calculation here
    sql = """
    INSERT INTO crew_info (crew_id, movie_code, role_id) 
    VALUES (%s, %s, %s)
    """
    cursor.execute(sql, (crew_member['crew_id'], movie_code, crew_member['role_id']))

# Call the function
# insert_crew_info()
