In [248]:
import pandas as pd

df = pd.read_csv('datasets/jeopardy-project.csv', delimiter=';')
df.head()

Unnamed: 0,Show Number,Air Date,Round,Category,Value,Question,Answer
0,4680,2004-12-31,Jeopardy!,HISTORY,"$200,00","For the last 8 years of his life, Galileo was ...",Copernicus
1,4680,2004-12-31,Jeopardy!,ESPN's TOP 10 ALL-TIME ATHLETES,"$200,00",No. 2: 1912 Olympian; football star at Carlisl...,Jim Thorpe
2,4680,2004-12-31,Jeopardy!,EVERYBODY TALKS ABOUT IT...,"$200,00",The city of Yuma in this state has a record av...,Arizona
3,4680,2004-12-31,Jeopardy!,THE COMPANY LINE,"$200,00","In 1963, live on ""The Art Linkletter Show"", th...",McDonald's
4,4680,2004-12-31,Jeopardy!,EPITAPHS & TRIBUTES,"$200,00","Signer of the Dec. of Indep., framer of the Co...",John Adams


In [249]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 216930 entries, 0 to 216929
Data columns (total 7 columns):
 #   Column       Non-Null Count   Dtype 
---  ------       --------------   ----- 
 0   Show Number  216930 non-null  int64 
 1    Air Date    216930 non-null  object
 2    Round       216930 non-null  object
 3    Category    216930 non-null  object
 4    Value       216930 non-null  object
 5    Question    216930 non-null  object
 6    Answer      216928 non-null  object
dtypes: int64(1), object(6)
memory usage: 11.6+ MB


In [250]:
df.isna().count()

Show Number    216930
 Air Date      216930
 Round         216930
 Category      216930
 Value         216930
 Question      216930
 Answer        216930
dtype: int64

In [251]:
new_columns = [i.strip().lower() for i in df.columns]
df.columns = new_columns
df.columns

Index(['show number', 'air date', 'round', 'category', 'value', 'question',
       'answer'],
      dtype='object')

In [252]:
df['value'] = df['value'].apply(lambda row: row[1:5].replace(',', '') if row != 'None' else 0)
df['value'] = df['value'].astype('float32')

In [260]:
def filtering_df(dataset, words):

    filtering = lambda x: all(word.lower() in x.lower() for word in words)
    return dataset.loc[dataset["question"].apply(filtering)]

filtered_df = filtering_df(df, ['truly'])
filtered_df

Unnamed: 0,show number,air date,round,category,value,question,answer
5672,3911,2001-09-10,Double Jeopardy!,OOH... A WISE GUY,600.0,The writings of this man seen here were truly ...,Karl Marx
9151,3907,2001-09-04,Jeopardy!,FUN WITH OPERA,200.0,It was truly a red-letter day when an opera ba...,The Scarlet Letter
10197,5416,2008-03-10,Jeopardy!,THE CAT,1000.0,This breed of domestic feline from Maine is th...,a Maine coon
11804,3053,1997-12-03,Jeopardy!,MAMMALS,400.0,They're the only truly amphibious members of t...,Otters
11951,3276,1998-11-30,Jeopardy!,PEOPLE EAT THAT?,100.0,"The book ""Fashionable Food"" tells how to make ...",popcorn
14412,5342,2007-11-27,Double Jeopardy!,NASAL PASSAGES,800.0,"""Truly that nose is the glorious cross he bear...",(Cyrano) de Bergerac
20408,3665,2000-07-07,Jeopardy!,LIFE SCIENCE,100.0,"These animals, the only mammals that can truly...",Bats
30501,6152,2011-05-17,Jeopardy!,BOND FILMS IN OTHER WORDS,400.0,"""One Secret Agent Devoted To Yours Truly""",The Spy Who Loved Me
33424,3388,1999-05-05,Jeopardy!,FUTILE PHRASES,400.0,"If your dream is truly impossible, you're just...",Your head
62097,2919,1997-04-17,Double Jeopardy!,WORLD NOVELISTS,400.0,"The idiot of ""The Idiot"" is this Russian autho...",Fyodor Dostoevsky


In [233]:
df['value'].mean()

662.40857

In [234]:
def calculate_avg(words):
    
    found_row = filtering_df(df, words)
    return found_row['value'].mean()

def difficulty(words):
    
    if calculate_avg(words) > 700:
        return 'Avg.: {} Too Difficult'.format(calculate_avg(words))
    elif calculate_avg(words)> 500:
        return 'Avg.: {} Difficult'.format(calculate_avg(words))
    else:
        return 'Avg.: {} Easy'.format(calculate_avg(words))

In [235]:
words = ['Mike', 'Michael']
calculate_avg(words)

683.3333

In [236]:
difficulty(words)

'Avg.: 683.3333129882812 Difficult'

In [256]:
def unique_answer(dataset, words):
    filtered_df = filtering_df(dataset, words)
    unique_values = filtered_df.groupby('answer').question.count().reset_index()
    return unique_values.sort_values(by=['question'], ascending=True)
unique_answer(df, words)

Unnamed: 0,answer,question
0,The Jungle,1
3478,Will Smith,1
3477,Will & Grace,1
3475,Wiffleball,1
3474,Wicked,1
...,...,...
1027,David,30
2070,Louis XIV,31
2723,Richard III,33
2963,Solomon,35


In [295]:
df['year'] = pd.to_datetime(df['air date']).dt.year
df

Unnamed: 0,show number,air date,round,category,value,question,answer,year
0,4680,2004-12-31,Jeopardy!,HISTORY,200.0,"For the last 8 years of his life, Galileo was ...",Copernicus,2004
1,4680,2004-12-31,Jeopardy!,ESPN's TOP 10 ALL-TIME ATHLETES,200.0,No. 2: 1912 Olympian; football star at Carlisl...,Jim Thorpe,2004
2,4680,2004-12-31,Jeopardy!,EVERYBODY TALKS ABOUT IT...,200.0,The city of Yuma in this state has a record av...,Arizona,2004
3,4680,2004-12-31,Jeopardy!,THE COMPANY LINE,200.0,"In 1963, live on ""The Art Linkletter Show"", th...",McDonald's,2004
4,4680,2004-12-31,Jeopardy!,EPITAPHS & TRIBUTES,200.0,"Signer of the Dec. of Indep., framer of the Co...",John Adams,2004
...,...,...,...,...,...,...,...,...
216925,4999,2006-05-11,Double Jeopardy!,RIDDLE ME THIS,2000.0,This Puccini opera turns on the solution to 3 ...,Turandot,2006
216926,4999,2006-05-11,Double Jeopardy!,"""T"" BIRDS",2000.0,In North America this term is properly applied...,a titmouse,2006
216927,4999,2006-05-11,Double Jeopardy!,AUTHORS IN THEIR YOUTH,2000.0,"In Penny Lane, where this ""Hellraiser"" grew up...",Clive Barker,2006
216928,4999,2006-05-11,Double Jeopardy!,QUOTATIONS,2000.0,"From Ft. Sill, Okla. he made the plea, Arizona...",Geronimo,2006


How many questions from the 90s use the word "Computer" compared to questions from the 2000s?

In [332]:
filtered_df.year.astype('float32')

def dates(start_year, end_year):
    return df[(df.year > start_year) & (df.year < end_year)]

word_list = ['computer']
ninties = filtering_df(dates(1990,1999), word_list).question.count()
twenties = filtering_df(dates(2000,2020), word_list).question.count()

print('Number of computer word in 90s: {}'.format(ninties))
print('Number of computer word in 20s: {}'.format(twenties))


Number of computer word in 90s: 61
Number of computer word in 20s: 302


In [363]:
def quiz(df):
    
    
    import random
    loop = 0
    point = 0
    
    print('Every true answer is 10 point. Be careful')
    
    while loop<3:
        
        random_num = random.randint(0, len(df))
        question = df['question'][random_num]
        answer = df['answer'][random_num]
       
        print(question)
        user_input = input('Please enter your answer here!')

        if user_input.lower() == answer.lower:
            
            print('Your answer is Correct')
            print('You are great')
            
            point += 10
            
        else:
            print('  ')
            print('Wrong answer')
            
            print('The answer is {}'.format(answer))
           
        
        print('---------------------')
    
        
        loop +=1
    
    return 'Final Score is {}'.format(point)
        
    
    
    
    
quiz(df)

Every true answer is 10 point. Be careful
In the work seen here, the artist obeyed this 17th century English leader's wish to be shown, warts & all
Please enter your answer here!werwe
  
Wrong answer
The answer is Oliver Cromwell
---------------------
Faces are garishly lit by gas lamp in "At the Moulin Rouge", one of his many depictions of Parisian night life
Please enter your answer here!wewe
  
Wrong answer
The answer is Toulouse-Lautrec
---------------------
His 1922 novella "Siddhartha" tells the story of an Indian boy on a spiritual journey during the time of Buddha
Please enter your answer here!ewe
  
Wrong answer
The answer is (Hermann) Hesse
---------------------


'Final Score is 0'