In [None]:
import pandas as pd
pd.set_option('display.max_colwidth', -1) # display the full contents of the columns

In [None]:
jep = pd.read_csv('jeopardy.csv')
jep.columns = ['Show Number','Air Date','Round','Category','Value','Question','Answer'] # removes leading whitespaces

In [None]:
jep.head(10) # to inspect the dataframe

In [None]:
def filter_theme(df, word_list):
    filter = lambda x : all(word.lower() in x.lower() for word in word_list)
    filtered_df = df[df.Question.apply(filter)]
    return filtered_df

In [None]:
# Filter the data for questions related to a particular theme.

theme = ['Emperor','War'] # add any number of words in the list
theme_jep = filter_theme(jep,theme)
theme_jep

In [None]:
# Inspecting the values in the Value column to assess cleaning needs
jep.Value.unique()

In [None]:
# Reformatting the Value column to enable later calculations
jep.Value = jep['Value'].replace('\$','',regex=True)
jep.Value = jep['Value'].replace(',','',regex=True)
jep.Value = jep['Value'].replace('None','',regex=True)
jep.Value = pd.to_numeric(jep.Value)

In [None]:
# Find the average reward
jep.Value.mean()

In [None]:
# Find the median reward
jep.Value.median()

In [None]:
# Find the average reward for Ireland related questions
ireland_jep = filter_theme(jep,['Ireland'])
ireland_jep.Value.mean()

In [None]:
# Find the average reward for Japan & War related questions
japan_war_jep = filter_theme(jep,['Japan','War'])
japan_war_jep.Value.mean()

In [None]:
# Find the average reward for Mathematics related questions
maths_jep = filter_theme(jep,['Mathematics'])
maths_jep.Value.mean()

In [None]:
# Find the questions with a reward higher than $10000
jep[jep.Value > 10000]

In [None]:
# Find the average reward for the Double Jeopardy round
jep[jep.Round == 'Double Jeopardy!'].Value.mean()

In [None]:
# Find the question with the highest reward
jep[jep.Value == jep.Value.max()]

In [None]:
# How many shows are included in the dataframe?
jep['Show Number'].nunique()

In [None]:
# How many categories of questions are there?
len(jep.Category.unique())

In [None]:
# Find the count of the unique answers to all of the questions in a dataset

def unique_answers(df):
    x = df.groupby('Answer').count()
    return x

u_answers = unique_answers(jep).reset_index()
u_answers


In [None]:
# What is the overall most common answer?

u_answers = u_answers[['Answer','Question']]
u_answers.columns = ['Answer','No of related questions']
u_answers[u_answers['No of related questions'] == u_answers['No of related questions'].max()]

In [None]:
# Show the list of questions corresponding to the most common answer

top_answer = u_answers[u_answers['No of related questions'] == u_answers['No of related questions'].max()].Answer

jep[jep.Answer == top_answer.values[0]]

In [None]:
ireland_answers = unique_answers(ireland_jep)
ireland_answers

In [None]:
# What is the most common answer to questions related to Ireland

top_ire_answers = ireland_answers[ireland_answers.Question == ireland_answers.Question.max()]

#max_ans = unique_answers(ireland_jep).max()


In [None]:
# Find the most common answer to questions on a particular theme, and display the corresponding questions

def top_answer_questions(df,theme):
    filtered_df = filter_theme(df,theme)
#    print(filtered_df)
    answers = unique_answers(filtered_df).reset_index() 
#    print(answers)
    answers = answers[['Answer','Question']]
    answers.columns = ['Answer','No of related questions']
    top_answer = answers[answers['No of related questions'] == answers['No of related questions'].max()].Answer
    if len(top_answer) == 0:
        print('There are no questions related to this theme')
    else:
        print('The most common answer related to ', end='')
        for word in theme:
            print(word, end=' ')
        print('is', end=' ')
        print("'", top_answer.values[0], "'", end=' with ')
        print(answers['No of related questions'].max(), 'questions.')
        print()
#    print(answers[answers['No of related questions'] == answers['No of related questions'].max()])
#    print()
        rows_to_display = filtered_df[filtered_df.Answer == top_answer.values[0]].reset_index(drop=True)
        print('Related questions:')
        for i in range(0,len(rows_to_display)):
            print(rows_to_display[['Question']].values[i])


theme = ['Famine','Ireland']
top_answer_questions(jep, theme)

In [None]:
theme = ['Basketball','New York']
top_answer_questions(jep, theme)

In [None]:
# Find the top 5 most common answers to questions on a particular theme, and display the corresponding questions