In [139]:
import pandas as pd # type: ignore
import os # type: ignore
import opendatasets as od # type: ignore
import numpy as np # type: ignore
import random # type: ignore

In [None]:
dataset = 'https://www.kaggle.com/datasets/ashirwadsangwan/imdb-dataset/data'

od.download(dataset)

In [141]:
data_dir = './imdb-dataset'

In [None]:
os.listdir(data_dir)

# role dataset

In [143]:
roles_df = pd.read_table('./imdb-dataset/name.basics.tsv')

In [144]:
roles_df.replace('\\N', np.nan, inplace=True)

In [145]:
roles_df.dropna(inplace=True)

In [None]:
roles_df

In [None]:
roles_df.info()

In [148]:
roles_df[['first_profession', 'second_profession', 'third_profession']] = roles_df['primaryProfession'].str.split(',', expand=True)

In [149]:
roles_df = roles_df.drop(['primaryProfession'], axis = 1)

In [150]:
roles_df[['movie_1', 'movie_2', 'movie_3', 'movie_4']] = roles_df['knownForTitles'].str.split(',', expand=True)

In [151]:
roles_df = roles_df.drop(['knownForTitles'], axis = 1)

In [None]:
roles_df

In [153]:
roles_df = roles_df.rename(columns={'nconst': 'name_id',
                                    'primaryName': 'name_surname',
                                    'birthYear': 'birth',
                                    'deathYear': 'death'})

In [None]:
roles_df

# movie dataset

In [None]:
movie_df = pd.read_table('./imdb-dataset/title.basics.tsv')

In [156]:
movie_df.replace('\\N', np.nan, inplace=True)

In [157]:
movie_df.dropna(inplace=True)

In [None]:
movie_df

In [159]:
movie_df[['genre_1', 'genre_2', 'genre_3']] = movie_df['genres'].str.split(',', expand=True)

In [160]:
movie_df  = movie_df.drop(['genres'], axis = 1)

In [161]:
movie_df = movie_df.rename(columns = {'tconst': 'movie_id',
                                      'titleType': 'type',
                                      'primaryTitle': 'title',
                                      'originalTitle': 'original_title',
                                      'isAdult': 'adult',
                                      'startYear': 'start_year',
                                      'endYear': 'end_year',
                                      'runtimeMinutes': 'minutes_runtimes'})

# region dataset

In [None]:
region_df = pd.read_table('./imdb-dataset/title.akas.tsv')

In [None]:
region_df.replace('\\N', np.nan, inplace=True)

In [None]:
region_df.dropna(inplace=True)

In [None]:
region_df

# merge dataset

In [None]:
roles_df

In [None]:
movie_df

In [113]:
merge_set1 = pd.merge(roles_df, movie_df, left_on='movie_1', right_on='movie_id').drop(
    ['movie_1', 'movie_2', 'movie_3', 'movie_4', 'movie_id', 'name_id'], axis = 1)

In [114]:
merge_set2 = pd.merge(roles_df, movie_df, left_on='movie_2', right_on='movie_id').drop(
    ['movie_1', 'movie_2', 'movie_3', 'movie_4', 'movie_id', 'name_id'], axis = 1)

In [115]:
merge_set3 = pd.merge(roles_df, movie_df, left_on='movie_3', right_on='movie_id').drop(
    ['movie_1', 'movie_2', 'movie_3', 'movie_4', 'movie_id', 'name_id'], axis = 1)

In [116]:
merge_set4 = pd.merge(roles_df, movie_df, left_on='movie_4', right_on='movie_id').drop(
    ['movie_1', 'movie_2', 'movie_3', 'movie_4', 'movie_id', 'name_id'], axis = 1)

In [117]:
merge_set = pd.concat([merge_set1, merge_set2, merge_set3, merge_set4])

In [None]:
merge_set

# quiz

In [119]:
merge_set['start_year'] = pd.to_numeric(merge_set['start_year'], errors='coerce').fillna(0).astype(int)

In [None]:
merge_set['start_year'].plot(kind='density', figsize=(14,6))

In [121]:
def difficulty(set):
    dif = str(input('choose the difficulty between easy, medium and hard')).lower()
    if dif == 'hard':
        set = set[set['start_year'] < 1960]
    elif dif == 'medium':
        set = set[(set['start_year'] > 1961) & (set['start_year'] < 1980)]
    elif dif == 'easy':
        set = set[set['start_year'] > 1981]
    return set

In [136]:
def quiz(set):
    counter = 0
    set = difficulty(set)
    score = 0
    x = int(input('how many rounds do you want to play ?'))
    while counter < x:
        n_rows, _ = set.shape
        indices = random.sample(set.index.tolist(), k = n_rows)[0]
        title = set['title'].iloc[indices]
        name_surname = set['name_surname'].iloc[indices]
        correct_answer = set['start_year'].iloc[indices]
    
        print(f"in which year was '{title}' of '{name_surname}' produced ?")
    
        my_answer = int(input('enter your answer'))
        if my_answer == correct_answer:
            print('your are correct')
            score += 1
            print(f'your score is: {score}')
        else:
            print('you are wrong')
            print(f'your score is: {score}')

        print(f"the correct answer is: '{correct_answer}'")
        print('----------------------------------')
        counter += 1
    
    print('end')
    print(f'your final score is: {score}')

In [None]:
quiz(merge_set)