<a href="https://colab.research.google.com/github/ihsanmujahid/Codecademy_IHSAN/blob/Jeopardy-Project/Jeopardy_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

# Set display option to show full column contents
pd.set_option('display.max_colwidth', -1)

# Load the data into a DataFrame
data = pd.read_csv('jeopardy.csv')

# Investigate the contents of the DataFrame
print(data.head())

# Rename the column names for easier use
data = data.rename(columns={" Air Date": "Air Date",
                            " Round": "Round",
                            " Category": "Category",
                            " Value": "Value",
                            " Question": "Question",
                            " Answer": "Answer"})

# Function to filter dataset for questions containing all words in a list
def filter_questions(data, words):
    # Convert words to lowercase for case-insensitive matching
    words = [word.lower() for word in words]

    # Filter the dataset for questions containing all words
    filtered_data = data[data['Question'].str.lower().apply(lambda question: all(word in question for word in words))]

    return filtered_data

# Test the filter_questions function
filtered_data = filter_questions(data, ["King", "England"])
print(filtered_data['Question'])

# Function to convert "Value" column to floats
def convert_value_to_float(value):
    value = value.replace('$', '').replace(',', '')  # Remove dollar sign and comma
    try:
        return float(value)
    except ValueError:
        return None

# Convert "Value" column to floats
data['Float Value'] = data['Value'].apply(convert_value_to_float)

# Filter the dataset for questions containing the word "King" and calculate the average value
king_questions = filter_questions(data, ["King"])
average_value = king_questions['Float Value'].mean()
print("Average value of questions containing the word 'King':", average_value)

# Function to count unique answers in a dataset
def count_unique_answers(data):
    unique_answers = data['Answer'].nunique()
    return unique_answers

# Count unique answers in the dataset
unique_answers_count = count_unique_answers(data)
print("Number of unique answers:", unique_answers_count)

# Exploration ideas:
# 1. Investigate the ways in which questions change over time by filtering by the date.
#    How many questions from the 90s use the word "Computer" compared to questions from the 2000s?

# Filter the dataset for questions from the 90s containing the word "Computer"
nineties_computer_questions = filter_questions(data[data['Air Date'].str.startswith('199')], ["Computer"])
num_nineties_computer_questions = len(nineties_computer_questions)
print("Number of questions from the 90s containing the word 'Computer':", num_nineties_computer_questions)

# Filter the dataset for questions from the 2000s containing the word "Computer"
two_thousands_computer_questions = filter_questions(data[data['Air Date'].str.startswith('200')], ["Computer"])
num_two_thousands_computer_questions = len(two_thousands_computer_questions)
print("Number of questions from the 2000s containing the word 'Computer':", num_two_thousands_computer_questions)

# 2. Is there a connection between the round and the category?
#    Are you more likely to find certain categories, like "Literature" in Single Jeopardy or Double Jeopardy?

# Group the data by round and category and count the occurrences
category_counts = data.groupby(['Round', 'Category']).size().reset_index(name='Count')

# Filter the category_counts DataFrame for the "Literature" category
literature_category_counts = category_counts[category_counts['Category'] == 'LITERATURE']

print("Category counts for 'Literature' in each round:")
print(literature_category_counts)

# 3. Build a system to quiz yourself.
#    Grab random questions, and use the input function to get a response from the user.
#    Check to see if that response was right or wrong.

import random

# Function to present a random question and check the user's response
def quiz_system(data):
    random_index = random.randint(0, len(data) - 1)
    question = data.iloc[random_index]['Question']
    answer = data.iloc[random_index]['Answer']

    print("Question:", question)
    user_response = input("Your answer: ")

    if user_response.lower() == answer.lower():
        print("Correct!")
    else:
        print("Wrong. The correct answer is:", answer)

# Call the quiz_system function
quiz_system(data)




  pd.set_option('display.max_colwidth', -1)


   Show Number    Air Date      Round                         Category  Value  \
0  4680         2004-12-31  Jeopardy!  HISTORY                          $200    
1  4680         2004-12-31  Jeopardy!  ESPN's TOP 10 ALL-TIME ATHLETES  $200    
2  4680         2004-12-31  Jeopardy!  EVERYBODY TALKS ABOUT IT...      $200    
3  4680         2004-12-31  Jeopardy!  THE COMPANY LINE                 $200    
4  4680         2004-12-31  Jeopardy!  EPITAPHS & TRIBUTES              $200    

                                                                                                      Question  \
0  For the last 8 years of his life, Galileo was under house arrest for espousing this man's theory              
1  No. 2: 1912 Olympian; football star at Carlisle Indian School; 6 MLB seasons with the Reds, Giants & Braves   
2  The city of Yuma in this state has a record average of 4,055 hours of sunshine each year                      
3  In 1963, live on "The Art Linkletter Show", this compa