In [99]:
# Import functions.
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
from scipy.sparse.linalg import svds
from sklearn.preprocessing import LabelEncoder

In [100]:
# Import dataset
df = pd.read_csv('analyze.csv')

In [101]:
def collaborative_item(dataset):
    df_pod = dataset[['id','title', 'language', 'category', 'subcategory', 'rating']]
    df_pod_test = df_pod[0:10000]

    pod_rating = df_pod_test.pivot_table('rating', index = 'id', columns='title').fillna(0)
    pod_value = pod_rating.values.T
    
    SVD = TruncatedSVD(n_components=12)
    matrix = SVD.fit_transform(pod_value)
    corr = np.corrcoef(matrix)
    title = pod_rating.columns
    title_list = list(title)
    
    return title, corr, title_list
    

In [102]:
def collaborative_recommend(title_column, corr, title_list, title):
    coffey_hands = title_list.index(title)
    corr_coffey_hands  = corr[coffey_hands]
    result = list(title_column[(corr_coffey_hands >= 0.85)])[:10]
    print(coffey_hands)
    return result

In [104]:
def collaborative_author_based(dataset):
    # Make new dataset by select features of rating and other else.
    df_pod_rate = dataset[['author', 'id','created_at', 'rating']]
    df_pod_list1 = dataset[['id','title', 'language', 'category']]
    df_pod_list1 = df_pod_list1[0:7000]
    df_pod_rate = df_pod_rate[0:7000]
    
    # Encoding author and id
    encoder = LabelEncoder()
    label = encoder.fit_transform(df_pod_rate['author'])
    label1 = encoder.fit_transform(df_pod_list1['id'])
    new = pd.DataFrame(label, columns = ['author'])
    new1 = pd.DataFrame(label1, columns = ['id'])

    # Make new dataset by encoding features.
    df_pod_rate = df_pod_rate.drop(['author', 'id'], axis = 1)
    df_pod_list_drop = df_pod_list1.drop(['id'], axis = 1)

    df_pod_rating = pd.concat([new1, new, df_pod_rate], axis=1)
    df_pod_list = pd.concat([new, df_pod_list_drop], axis=1)

    # Make new pivot table by id, author, rating.
    pod_rating_matrix = df_pod_rating.pivot(
        index='id',
        columns='author',
        values='rating'
    ).fillna(0)

    # Make matrix and caulate user rating mean.
    # Also calculate user rating minus rating mean.
    matrix = pod_rating_matrix.values
    user_ratings_mean = np.mean(matrix, axis = 1)
    matrix_user_mean = matrix - user_ratings_mean.reshape(-1, 1)

   
    # Calculate U, sigma, V matrix by using SVD method of matrix factorization.
    U, sigma, Vt = svds(matrix_user_mean, k = 12)
    sigma = np.diag(sigma)

    # Restore original matrix and apply user podcast rating.
    svd_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)

    # Make dataet of matrix and rating column
    df_svd_preds = pd.DataFrame(svd_user_predicted_ratings, columns = pod_rating_matrix.columns)

    return df_svd_preds, df_pod_list, df_pod_rating


In [105]:
def recommend_movies(df_svd_preds, author_id, pod_df, pod_rate_df, num_recommendations=5):
    #Check user id and sort by svd_pred
    user_row_number = author_id - 1 
    sorted_user_predictions = df_svd_preds.iloc[user_row_number].sort_values(ascending=False)
    user_data = pod_rate_df[pod_rate_df.id == author_id] 
    # Get dataset about user history by author
    user_history = user_data.merge(pod_df, on = 'author').sort_values(['rating'], ascending=False)
    #Get recommendation by author.
    recommendations = pod_df[~pod_df['author'].isin(user_history['author'])]
    recommendations = recommendations.merge(pd.DataFrame(sorted_user_predictions).reset_index(), on = 'author')
    recommendations = recommendations.rename(columns = {user_row_number: 'Predictions'}).sort_values('Predictions', ascending = False).iloc[:num_recommendations, :]        
    return user_history, recommendations

In [106]:
def collaborative_main_func1(dataset, title, id, max_print_list, boolean_type):
    if boolean_type == 'title':    
        title_column, corr, title_list = collaborative_item(dataset)
        result = collaborative_recommend(title_column, corr, title_list, title)
        return result
    else:
        df_svd_preds, df_pod_list, df_pod_rating = collaborative_author_based(dataset)
        rated, predictions = recommend_movies(df_svd_preds, id, df_pod_list, df_pod_rating, max_print_list)
        rated = rated.drop(rated[rated['title'].str.contains('NaN', na=True)].index)
        rated = rated.drop(rated[rated['category'].str.contains('NaN', na=True)].index)
        predictions = predictions.drop(predictions[predictions['title'].str.contains('NaN', na=True)].index)
        predictions = predictions.drop(predictions[predictions['category'].str.contains('NaN', na=True)].index) 
        return rated, predictions

In [108]:
result = collaborative_main_func1(df, "The Kyle Warren Radio Show", 3941, 10, 'title')
result

7495


['(HD) Wasatch Mountain Podcast',
 'Classical Gasp with Duke Pesta',
 'CogKNITive Podcast',
 'Deep House Episodes',
 'Desert Island Discs',
 'Down Memory Lane',
 'Guilty Pleasures Podcast',
 "Insight for Living's Reframing Ministries Interviews",
 'Inspiring Innovation Podcast - Entrepreneur Success Stories, Case Studies, Lessons and Lifestyle Weekly',
 'Jones & Bartlett Learning - Health']

In [109]:
rated, predictions = collaborative_main_func1(df, "The Kyle Warren Radio Show", 3941, 10, 'id')

In [110]:
rated

Unnamed: 0,id,author,created_at,rating,title,language,category
0,3941,4567,2016-11-27T04:40:40,4,The Readers,en-us,Arts/Literature


In [111]:
predictions


Unnamed: 0,author,title,language,category,Predictions
678,5007,WDR 2 Quintessenz,de,Business,0.000372
679,5007,WDR 2 Kritiker,de,Arts,0.000372
674,5007,Funkhaus Europa Radio Colonia,de,Society & Culture,0.000372
672,5007,Funkhaus Europa Radio Forum,de,Society & Culture,0.000372
675,5007,WDR Lebenszeichen,de,Religion & Spirituality,0.000372
