In [198]:
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output

## Import data

In [312]:
# Get data
data_jokes = 'jokes.csv'
data = 'train.csv'

# Initialize dataframes
df = pd.read_csv(data)
df_jokes = pd.read_csv(data_jokes)

df.head()

Unnamed: 0,id,user_id,joke_id,Rating
0,31030_110,31030,110,2.75
1,16144_109,16144,109,5.094
2,23098_6,23098,6,-6.438
3,14273_86,14273,86,4.406
4,18419_134,18419,134,9.375


In [313]:
df_jokes.head()

Unnamed: 0,joke_id,joke_text
0,1,Q. What's O. J. Simpson's web address? A. Slas...
1,2,How many feminists does it take to screw in a ...
2,3,Q. Did you hear about the dyslexic devil worsh...
3,4,They asked the Japanese visitor if they have e...
4,5,Q: What did the blind person say when given so...


In [314]:
df.shape

(1092059, 4)

## Merge dataframes

In [315]:
# Merge dataframes on joke id
df = pd.merge(df, df_jokes, left_on='joke_id', right_on='joke_id', how='left').drop('id', axis=1)
df.head()

Unnamed: 0,user_id,joke_id,Rating,joke_text
0,31030,110,2.75,"Judy was having trouble with her computer, so ..."
1,16144,109,5.094,One day the first grade teacher was reading th...
2,23098,6,-6.438,Q. What is orange and sounds like a parrot? A....
3,14273,86,4.406,Two attorneys went into a diner and ordered tw...
4,18419,134,9.375,A man is driving in the country one evening wh...


## Helper function: Display jokes

In [324]:
# Helper function to display joke text
def joke_text(joke_id):
    joke_text = df.at[joke_id, 'joke_text']
    return joke_text

In [325]:
joke_text(1)

'One day the first grade teacher was reading the story of the Three Little Pigs to her class. She came to the part of the story where the first pig was trying to accumulate the building materials for his home. She read, "...and so the pig went up to the man with the wheelbarrow full of straw and said, \'Pardon me sir, but may I have some of that straw to build my house?\'" The teacher paused then asked the class, "And what do you think that man said?" One little boy raised his hand and said, "I know...he said, \'Holy Shit! A talking pig!\'"'

## Narrow ratings
If you want to narrow the ratings you can use the code below.

In [317]:
# If we want to narrow the ratings we can use this cell

# Users per joke
#users_per_joke = df.joke_id.value_counts()
#users_per_joke.shape

# Jokes per users
#jokes_per_user = df.user_id.value_counts()
#jokes_per_user.shape

# Picking jokes that has been rated by more than 10 users
#df = df[df['joke_id'].isin(users_per_joke[users_per_joke>10].index)]

# User who rated more than 10 jokes
#df = df[df['user_id'].isin(jokes_per_user[jokes_per_user>10].index)]

## Ratings Matrix

In [318]:
# Create ratings matrix
rating_matrix_pivot = pd.pivot_table(df, values='Rating', index=['user_id'], columns=['joke_id']).fillna(0)

#rating_matrix_pivot.tail()

## Sparse Matrix

In [319]:
# Create sparse matrix
from scipy.sparse import csr_matrix
# Create sparse matrix
ratings_matrix = csr_matrix(rating_matrix_pivot.values)
#ratings_matrix

## Initialize Model

In [320]:
# Implement KNN with cosine metric
from sklearn.neighbors import NearestNeighbors

model = NearestNeighbors(metric='cosine', algorithm='brute')
model = model.fit(ratings_matrix)

## Helper function: Recommend jokes

In [322]:
# Helper function to recommend jokes

def recommend_jokes():
    query_index = np.random.choice(rating_matrix_pivot.shape[0])
    distances, indices = model.kneighbors(rating_matrix_pivot.iloc[query_index,:].values.reshape(1, -1), n_neighbors = 5)
    
    joke_list = []
    
    user = ''
    
    for i in range(0, len(distances.flatten())):
        if i == 0:
            user_index = str(query_index) 
            user = user_index

        else:
            try:
                joke_list.append(joke_text(rating_matrix_pivot.index[indices.flatten()[i]]))
                
            except:
                joke_list.append("")
    
    user = 'Recommended jokes for user ' + user
        
    df = pd.DataFrame(joke_list, columns = [user])
    df.set_index(user, inplace=True)
    

    return df

In [323]:
# Try function
recommend_jokes()

Q. What is orange and sounds like a parrot? A. A carrot.
"There was an engineer who had an exceptional gift for fixing all things mechanical. After serving his company loyally for over 30 years, he happily retired. Several years later the company contacted him regarding a seemingly impossible problem they were having with one of their multi-million dollar machines. They had tried everything and everyone else to get the machine fixed, but to no avail. In desperation, they called on the retired engineer who had solved so many of their problems in the past. The engineer reluctantly took the challenge. He spent a day studying the huge machine. At the end of the day, he marked a small ""x"" in chalk on a particular component of the machine and proudly stated: ""This is where your problem is."" The part was replaced and the machine worked perfectly again. The company received a bill for $50,000 from the engineer for his service. They demanded an itemized accounting of his charges. The engineer responded briefly: One chalk mark: $1. Knowing where to put it: $49,999. He was paid in full and the engineer retired again in peace."
"What do you call an American in the finals of the world cup? ""Hey beer man!"""
"A man went to apply for a job. After filling out all of his applications, he waited anxiously for the outcome. The employer read all his applications and said, ""We have an opening for people like you."" ""Oh, great,"" he said. ""What is it?"" ""It's called the door!"""


# Use the button below to get recommendations for jokes
[GitHub Repo](https://github.com/helenabarmer/joke_recommender)


In [310]:
# Create widget button
button = widgets.Button(description="Generate Joke", button_style='success')

# Output
output = widgets.Output(layout={'border': '1px solid black'})

# Display button
display(button, output)

def joke(b):
       with output:
            clear_output(True)
            display(recommend_jokes())
            
            
button.on_click(joke)

Button(button_style='success', description='Generate Joke', style=ButtonStyle())

Output(layout=Layout(border='1px solid black'))