### Importing packages

In [1]:
import csv
import pandas as pd
pd.set_option('display.max_colwidth', None)

### Reading file

In [2]:
jeopardy_df = pd.read_csv('jeopardy.csv')
jeopardy_df.head()

Unnamed: 0,Show Number,Air Date,Round,Category,Value,Question,Answer
0,4680,2004-12-31,Jeopardy!,HISTORY,$200,"For the last 8 years of his life, Galileo was under house arrest for espousing this man's theory",Copernicus
1,4680,2004-12-31,Jeopardy!,ESPN's TOP 10 ALL-TIME ATHLETES,$200,"No. 2: 1912 Olympian; football star at Carlisle Indian School; 6 MLB seasons with the Reds, Giants & Braves",Jim Thorpe
2,4680,2004-12-31,Jeopardy!,EVERYBODY TALKS ABOUT IT...,$200,"The city of Yuma in this state has a record average of 4,055 hours of sunshine each year",Arizona
3,4680,2004-12-31,Jeopardy!,THE COMPANY LINE,$200,"In 1963, live on ""The Art Linkletter Show"", this company served its billionth burger",McDonald's
4,4680,2004-12-31,Jeopardy!,EPITAPHS & TRIBUTES,$200,"Signer of the Dec. of Indep., framer of the Constitution of Mass., second President of the United States",John Adams


### Renaming columns

In [3]:
jeopardy_df.columns = ['Show Number', 'Air Date', 'Round', 'Category', 'Value', 'Question', 'Answer']

### Function to filter a row for a list of words

In [4]:
def filter_dataset(df, words, col):
    return df[df[col].apply(lambda row : all(word.lower() in row.lower() for word in words))]

In [5]:
filtered = filter_dataset(jeopardy_df, ['King', 'England'], 'Question')
filtered

Unnamed: 0,Show Number,Air Date,Round,Category,Value,Question,Answer
4953,3003,1997-09-24,Double Jeopardy!,"""PH""UN WORDS",$200,"Both England's King George V & FDR put their stamp of approval on this ""King of Hobbies""",Philately (stamp collecting)
6337,3517,1999-12-14,Double Jeopardy!,Y1K,$800,"In retaliation for Viking raids, this ""Unready"" king of England attacks Norse areas of the Isle of Man",Ethelred
9191,3907,2001-09-04,Double Jeopardy!,WON THE BATTLE,$800,This king of England beat the odds to trounce the French in the 1415 Battle of Agincourt,Henry V
11710,2903,1997-03-26,Double Jeopardy!,BRITISH MONARCHS,$600,"This Scotsman, the first Stuart king of England, was called ""The Wisest Fool in Christendom""",James I
13454,4726,2005-03-07,Jeopardy!,A NUMBER FROM 1 TO 10,$1000,It's the number that followed the last king of England named William,4
...,...,...,...,...,...,...,...
208295,4621,2004-10-11,Jeopardy!,THE VIKINGS,$600,In 1066 this great-great grandson of Rollo made what some call the last Viking invasion of England,William the Conqueror
208742,4863,2005-11-02,Double Jeopardy!,BEFORE & AFTER,"$3,000",Dutch-born king who ruled England jointly with Mary II & is a tasty New Zealand fish,William of Orange roughy
213870,5856,2010-02-15,Double Jeopardy!,URANUS,$1600,In 1781 William Herschel discovered Uranus & initially named it after this king of England,George III
216021,1881,1992-11-09,Double Jeopardy!,HISTORIC NAMES,$1000,"His nickname was ""Bertie"", but he used this name & number when he became king of England in 1901",Edward VII


### Difficulty of Questions according to filtered condition

In [6]:
jeopardy_df['Value_float'] = jeopardy_df['Value'].str.replace(',','').replace('None','0').str.strip('$')
jeopardy_df['Value_float'] = jeopardy_df['Value_float'].astype('float')
print('The average of all questions is', jeopardy_df.Value_float.mean())

The average of all questions is 739.9884755451067


In [7]:
filtered_king = filter_dataset(jeopardy_df, ['King'], 'Question')
print('The average of questions with king is', filtered_king.Value_float.mean())

The average of questions with king is 771.8833850722094


### Counts of Unique Answers of filtered condition

In [8]:
def unique_answers_count(df):
    return df.Answer.value_counts()

In [9]:
unique_answers_count(filtered_king)

Henry VIII             55
Solomon                35
Richard III            33
Louis XIV              31
David                  30
                       ..
Appalachian Trail       1
Pyramids                1
the Romans              1
soft shoulder           1
Siam (for Thailand)     1
Name: Answer, Length: 5268, dtype: int64

### Extra Questions

In [10]:
jeopardy_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 216930 entries, 0 to 216929
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   Show Number  216930 non-null  int64  
 1   Air Date     216930 non-null  object 
 2   Round        216930 non-null  object 
 3   Category     216930 non-null  object 
 4   Value        216930 non-null  object 
 5   Question     216930 non-null  object 
 6   Answer       216928 non-null  object 
 7   Value_float  216930 non-null  float64
dtypes: float64(1), int64(1), object(6)
memory usage: 13.2+ MB


### Changing the dtype of Air Date to Datetime

In [11]:
jeopardy_df['Air Date'] = pd.to_datetime(jeopardy_df['Air Date'])

In [12]:
filtered_computer = filter_dataset(jeopardy_df, ['Computer'], 'Question')
filtered_computer

Unnamed: 0,Show Number,Air Date,Round,Category,Value,Question,Answer,Value_float
309,5690,2009-05-08,Jeopardy!,OLD FOLKS IN THEIR 30s,$600,Linus Torvalds is the father of this operating system used on cell phones & supercomputers,Linux,600.0
342,5690,2009-05-08,Double Jeopardy!,MATHEM-ATTACK!,$1200,"(<a href=""http://www.j-archive.com/media/2009-05-08_DJ_28.jpg"" target=""_blank"">Kelly of the Clue Crew shows an array of numbers enclosed in brackets on the monitor.</a>) A set of numbers in rows and columns can be used in many ways--for example, to encrypt a code or create 3-D computer graphics; the set shares this name with a 1999 film",a matrix,1200.0
1106,4085,2002-05-10,Double Jeopardy!,"""EN"" THE BEGINNING",$800,"2-word term for the consumer, for whom a computer is ultimately designed",an end user,800.0
1430,4960,2006-03-17,Jeopardy!,RECORD LOSSES IN 2005,$200,"A computer with 98,000 names & SSNs was reported stolen from this oldest campus of the Univ. of Calif.",Berkeley,200.0
2410,3214,1998-07-16,Jeopardy!,PRE-COLUMBIAN CULTURES,$500,Warriors of this Yucatan civilization battle in the computer-enhanced mural seen here:,Mayans,500.0
...,...,...,...,...,...,...,...,...
215116,6036,2010-12-06,Double Jeopardy!,"WHAT HAPPENED TO THE ""MOTHER""s?",$400,The main circuitry of a computer is found here,the motherboard,400.0
215539,4216,2002-12-23,Double Jeopardy!,WORDS WITHIN WORDS,$1600,You may have to do this to a computer disk to enable it to store information,format (in(format)ion),1600.0
216299,5236,2007-05-21,Double Jeopardy!,FORBES' CARS FOR THE RICH,$400,This get personal for a personal computer mogul Michael Dell in a 2005 H2 from this manufacturer,Hummer,400.0
216527,3038,1997-11-12,Jeopardy!,COMPUTERESE,$400,"Like a waiter, a network computer might say ""I'll be your"" this & store files or manage printers",Server,400.0


### Comparing Years that had questions about Computers

In [13]:
filtered_computer.groupby(filtered_computer['Air Date'].dt.year)['Show Number'].count().reset_index()

Unnamed: 0,Air Date,Show Number
0,1984,1
1,1986,1
2,1987,1
3,1988,2
4,1989,1
5,1990,5
6,1991,1
7,1992,1
8,1993,1
9,1995,2


### Comparing Rounds that had questions about Literature

In [14]:
filtered_cat_lit = filter_dataset(jeopardy_df, ['Literature'], 'Category')
filtered_cat_lit.groupby('Round').Category.count().reset_index()

Unnamed: 0,Round,Category
0,Double Jeopardy!,1054
1,Final Jeopardy!,82
2,Jeopardy!,423


### Asking and answering random questions from Jeopardy

In [15]:
import random
def random_question(df):
    continue_stop = 'yes'
    correct_ans = 0
    wrong_ans = 0
    while continue_stop == 'yes':
        print('Your question is ' + df[['Question']].iloc[random.randint(1, len(df))])
        ans = input('Please input your answer: ')
        if ans.lower() == all(df[['Answer']].iloc[random.randint(1, len(df))]):
            correct_ans += 1
            print('Thats a correct answer, you have answered ' + str(correct_ans) + ' out of ' + str(correct_ans + wrong_ans) + ' questions correctly')
        else:
            wrong_ans += 1
            print('Thats a wrong answer, you have answered ' + str(wrong_ans) + ' out of ' + str(correct_ans + wrong_ans) + ' questions wrongly')
        continue_stop = input('Do you want to continue: ')
        if continue_stop.lower() == 'yes':
            continue_stop = 'yes'

In [16]:
random_question(jeopardy_df)

Question    Your question is 39 members of this doomsday cult in California committed suicide in March
Name: 52666, dtype: object
Please input your answer: logan
Thats a wrong answer, you have answered 1 out of 1 questions wrongly
Do you want to continue: yes
Question    Your question is From 1937 to 1954 he directed NBC's Radio Symphony Orchestra
Name: 76320, dtype: object
Please input your answer: stanlee
Thats a wrong answer, you have answered 2 out of 2 questions wrongly
Do you want to continue: yes
Question    Your question is This coral reef dweller seen <a href="http://www.j-archive.com/media/2004-07-06_DJ_30.jpg" target="_blank">here</a> can be vicious & has been known to attack underwater divers
Name: 3634, dtype: object
Please input your answer: no
Thats a wrong answer, you have answered 3 out of 3 questions wrongly
Do you want to continue: no
