In [15]:

import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.preprocessing import MultiLabelBinarizer

In [16]:
def k_means(X, K, max_iterations):
     # Initialize centroids randomly
    centroids = X[np.random.choice(X.shape[0], K, replace=False)]

    for i in range(max_iterations):
        # Assign each data point to the nearest centroid
        distances = np.sqrt(((X - centroids[:, np.newaxis])**2).sum(axis=2))
        labels = np.argmin(distances, axis=0)

        # Update the centroids based on the mean of the data points in each cluster
        for k in range(K):
            centroids[k] = X[labels == k].mean(axis=0)

    return labels, centroids

In [17]:
books=pd.read_csv('OpenBook.csv')
books.head()

books.columns

# books=books[['Book Id', 'Title', 'Author', 'average_rating', 'isbn','ratings_count', 'publisher', 'Genres']]

books.head(10)
# books["Image-URL-M"][0]

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L,Genres
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,"Fiction, Fantasy"
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,"Fantasy, Fiction, Fantasy"
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,"Fantasy, Fiction, Fantasy"
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,"Mystery, Fantasy, Fiction, Fantasy"
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,"Fantasy, Fiction"
5,399135782,The Kitchen God's Wife,Amy Tan,1991,Putnam Pub Group,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...,Fiction
6,425176428,What If?: The World's Foremost Military Histor...,Robert Cowley,2000,Berkley Publishing Group,http://images.amazon.com/images/P/0425176428.0...,http://images.amazon.com/images/P/0425176428.0...,http://images.amazon.com/images/P/0425176428.0...,"Fantasy, Fiction, Fantasy"
7,671870432,PLEADING GUILTY,Scott Turow,1993,Audioworks,http://images.amazon.com/images/P/0671870432.0...,http://images.amazon.com/images/P/0671870432.0...,http://images.amazon.com/images/P/0671870432.0...,"Science Fiction, Fiction, Fantasy"
8,679425608,Under the Black Flag: The Romance and the Real...,David Cordingly,1996,Random House,http://images.amazon.com/images/P/0679425608.0...,http://images.amazon.com/images/P/0679425608.0...,http://images.amazon.com/images/P/0679425608.0...,"Science Fiction, Fiction"
9,074322678X,Where You'll Find Me: And Other Stories,Ann Beattie,2002,Scribner,http://images.amazon.com/images/P/074322678X.0...,http://images.amazon.com/images/P/074322678X.0...,http://images.amazon.com/images/P/074322678X.0...,"Science Fiction, Fiction, Fantasy"


In [18]:
#convert the genre into binary features
books = books.dropna(subset=['Genres'])
genres = books['Genres'].str.split(',')
mlb = MultiLabelBinarizer()
genres_encoded = pd.DataFrame(mlb.fit_transform(genres), columns=mlb.classes_)
genres_encoded

Unnamed: 0,AUTOBIOGRAPHY,Action,Adult,Adult Fiction,American History,Art,Asian Literature,Autobiography,Biblical,Biography,...,Romance,Science Fiction,Short Stories,Sociology,Spirit,Suspense,Thriller,War,World War I,World War II
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49994,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
49995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
49996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
49997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
K = 100
max_iterations = 100
labels, centroids = k_means(genres_encoded.values, K, max_iterations)
books['Genre Cluster'] = labels
def recommend_books(book_name):
    book_cluster = books.loc[books['Book-Title'] == book_name, 'Genre Cluster'].values[0]
    recommended_books = books.loc[books['Genre Cluster'] == book_cluster, ['Book-Title','Book-Author', 'Publisher','Image-URL-M']]
    return recommended_books.head(5)

recommend_books('The Lord of the Rings')

In [19]:
import pickle
pickle.dump(books, open('poiuy.pkl', 'wb'))


In [12]:
import pickle
a=pickle.dump(books, open("images10.pkl", "wb"))


In [33]:
import pickle
pickle.dump(books,open("recommend1.pkl", "wb"))



In [34]:
import pickle

pickle.dump(book_name,open("recommend2.pkl", 'wb'))

NameError: name 'book_name' is not defined