In [None]:
!pip install streamlit==1.15.2 pgeocode matplotlib==3.6.2 scikit-surprise

In [None]:
import os
from urllib.request import urlopen
from zipfile import ZipFile
from io import BytesIO

dir_name = 'ml-100k'
url = 'http://files.grouplens.org/datasets/movielens/ml-100k.zip'

if not os.path.exists(dir_name):
  with urlopen(url) as zipresp:
    with ZipFile(BytesIO(zipresp.read())) as zfile:
        zfile.extractall()

print(dir_name + ' is in ' + os.getcwd())

In [None]:
%%writefile app.py
import streamlit as st

st.title('MovieLens 100K')

In [None]:
!mkdir pages

In [None]:
import os
os.environ['SURPRISE_DATA_FOLDER'] = 'ml-100k'
print(os.getenv('SURPRISE_DATA_FOLDER'))

In [None]:
%%writefile ./pages/movie.py
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go
from matplotlib.ticker import MaxNLocator
from collections import Counter

st.title("電影 (1682部)")

path = 'ml-100k/u.genre'
genre = [g[0] for g in pd.read_csv(path, sep='|', header=None).values]

path = 'ml-100k/u.user'
header = ['user_id', 'age', 'gender', 'occupation', 'zip_code']
user = pd.read_csv(path, sep='|', names=header)

path = 'ml-100k/u.data'
header = ['user_id', 'item_id', 'rating', 'timestamp']
rating = pd.read_csv(path, sep='\t', names=header)
rating = rating.merge(user, on='user_id')
rating['timestamp'] = pd.to_datetime(rating['timestamp'], unit='s')
avg_rating = rating.groupby('item_id')['rating'].mean().reset_index(drop=True)

path = 'ml-100k/u.item'
header = ['item_id', 'movie_title', 'release_date', 'video_release_date', 'IMDb_URL'] + genre
movie = pd.read_csv(path, sep='|', names=header, encoding='latin-1')
movie['release_date'] = pd.to_datetime(movie['release_date'])
movie['avg_rating'] = avg_rating
movie['rating_count'] = rating.groupby('item_id')['rating'].count().reset_index(drop=True)

tabs = st.tabs(['每年發行電影數量', '評分統計', '評分人數統計', '類型統計', '類型平均評分統計'])
with tabs[0]:
    year = set(movie['release_date'].dt.year)
    year = list(sorted(year))
    y = [0] * len(year)
    for i, x in enumerate(year):
        y[i] = len(movie[movie['release_date'].dt.year == x])
    fig = go.Figure(data=[go.Bar(x=year, y=y, text=y, textposition='auto')])
    fig.update_layout(
        xaxis_title='Year', 
        yaxis_title='Number of movies'
    )
    st.plotly_chart(fig)

with tabs[1]:
    genre_count = [0] * len(genre)
    for i, g in enumerate(genre):
        genre_count[i] = len(movie[movie[g] == 1])
    fig = go.Figure(data=[go.Pie(labels=genre, values=genre_count)])
    st.plotly_chart(fig)

with tabs[2]:
    fig = go.Figure(data=[go.Histogram(x=movie['avg_rating'], texttemplate='%{y}')])
    fig.update_layout(
        xaxis_title='Rating', 
        yaxis_title='Number of movies'
    )
    st.plotly_chart(fig)

with tabs[3]:
    fig = go.Figure(data=[go.Histogram(x=movie['rating_count'], texttemplate='%{y}')])
    fig.update_layout(
        xaxis_title='Rating count', 
        yaxis_title='Number of movies'
    )
    st.plotly_chart(fig)

with tabs[4]:
    genre_copy = genre.copy()
    m = {}
    for g in genre_copy:
        m[g] = movie[movie[g] == 1]['avg_rating'].mean()
    genre_avg_rating = [round(m[g], 2) for g in genre]
    fig = go.Figure(data=[go.Bar(x=genre, y=genre_avg_rating, text=genre_avg_rating, textposition='auto')])
    fig.update_layout(
        xaxis_title='Genre', 
        yaxis_title='Average rating'
    )
    st.plotly_chart(fig)

range = st.slider('發行時間', 1922, 1998, (1922, 1966))
st.write('發行時間：', range[0], '年 ~ ', range[1], '年')

genre_options = st.multiselect('類型', genre)
rating_options = st.slider('評分', 1, 5, (2, 3))
st.write('評分範圍：', rating_options[0], '分 ~ ', rating_options[1], '分')

for g in genre_options:
    movie = movie[movie[g] == 1]
movie = movie[(movie['release_date'].isnull()) | ((movie['release_date'].dt.year >= range[0]) & (movie['release_date'].dt.year <= range[1]))]
movie['release_date'] = movie['release_date'].dt.date
movie = movie[(movie['avg_rating'] >= rating_options[0]) & (movie['avg_rating'] <= rating_options[1])]
st.write('共有 ', len(movie), ' 部電影')

option = st.selectbox('排序依據', ['發行時間 (舊 → 新)', '發行時間 (新 → 舊)', 
'評分 (高 → 低)', '評分 (低 → 高)', '評分人數 (多 → 少)', '評分人數 (少 → 多)'], index=0)

if option == '發行時間 (舊 → 新)':
    movie = movie.sort_values(by='release_date', ascending=True)
elif option == '發行時間 (新 → 舊)':
    movie = movie.sort_values(by='release_date', ascending=False)
elif option == '評分 (高 → 低)':
    movie = movie.sort_values(by='avg_rating', ascending=False)
elif option == '評分 (低 → 高)':
    movie = movie.sort_values(by='avg_rating', ascending=True)
elif option == '評分人數 (多 → 少)':
    movie = movie.sort_values(by='rating_count', ascending=False)
elif option == '評分人數 (少 → 多)':
    movie = movie.sort_values(by='rating_count', ascending=True)

for i, row in movie.iterrows():
    container = st.container()
    container.title(f"{row['movie_title']}")
    container.markdown(f"發行時間 : {row['release_date']}")
    container.markdown(f"類型 : {', '.join([g for g in genre if row[g] == 1])}")
    container.markdown(f"平均評分 : {row['avg_rating']:0.2f} (評分人數 : {row['rating_count']})")
    expander = container.expander('詳細資料')
    tabs = expander.tabs(['評分分布', '每年平均評分', '每年評分人數', '評分者職業統計'])
    with tabs[0]:
        r = rating.copy()
        r = rating[rating['item_id'] == row['item_id']]['rating'].tolist()
        r = [r.count(i) for i in [1, 2, 3, 4, 5]]
        fig, ax = plt.subplots()
        ax.set_xlabel('Rating')
        ax.set_ylabel('Number of ratings')
        ax.set_xticks(np.arange(1, 6, 1))
        ax.set_ylim(top=(max(r) // 10 + 2) * 10)
        ax.yaxis.set_major_locator(MaxNLocator(integer=True))
        p = ax.bar(np.arange(1, 6, 1), r)
        ax.bar_label(p)
        st.pyplot(fig)

    with tabs[1]:
        year = set(rating['timestamp'].dt.year)
        year = list(sorted(year))
        y = [0] * len(year)
        for i, x in enumerate(year):
            val = rating[(rating['item_id'] == row['item_id']) & (rating['timestamp'].dt.year == x)]['rating'].mean()
            y[i] = val if not np.isnan(val) else 0
        
        fig, ax = plt.subplots()
        ax.set_xlabel('Year')
        ax.set_ylabel('Average rating')
        ax.set_xticks(year)
        ax.set_ylim(1, 5)
        ax.set_yticks(np.arange(0, 5.2, 0.2))

        ax.plot(year, y, 'go-', label='rating')
        ax.legend()

        for i, v in enumerate(y):
            ax.annotate(f'{v:0.2f}', (year[i], v), xytext=(year[i], v+0.1), ha='center')
        st.pyplot(fig)

    with tabs[2]:
        year = set(rating['timestamp'].dt.year)
        year = list(sorted(year))
        y2 = [0] * len(year)
        y3 = [0] * len(year)
        for i, x in enumerate(year):
            y2[i] = len(rating[(rating['item_id'] == row['item_id']) & (rating['gender'] == 'M') & (rating['timestamp'].dt.year == x)])
            y3[i] = len(rating[(rating['item_id'] == row['item_id']) & (rating['gender'] == 'F') & (rating['timestamp'].dt.year == x)])

        fig, ax = plt.subplots()
        ax.set_xlabel('Year')
        ax.set_ylabel('Number of ratings')
        ax.set_xticks(year)
        mx = max([i + j for i, j in zip(y2, y3)]) 
        ax.set_ylim(top=(mx // 10 + 2) * 10)
        ax.yaxis.set_major_locator(MaxNLocator(integer=True))

        p1 = ax.bar(year, y2, label='Male')
        p2 = ax.bar(year, y3, bottom=y2, label='Female')
        ax.legend()
    
        for c in ax.containers:
            labels = [v if v != 0 else "" for v in c.datavalues]    
            ax.bar_label(c, labels=labels, label_type="center")
        ax.bar_label(p2)
        st.pyplot(fig)

    with tabs[3]:
        df = rating[rating['item_id'] == row['item_id']]
        jobs = df['occupation'].tolist()
        cnt = Counter(jobs)
        jobs = list(set(jobs))
        jobs_cnt = [cnt[j] for j in jobs]
        fig = go.Figure(data=[go.Pie(labels=jobs, values=jobs_cnt)])
        st.plotly_chart(fig)

In [None]:
%%writefile ./pages/user.py
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import pgeocode
from matplotlib.ticker import MaxNLocator

st.title("影迷 (943人)")

path = 'ml-100k/u.user'
header = ['user_id', 'age', 'gender', 'occupation', 'zip_code']
user = pd.read_csv(path, sep='|', names=header)

path = 'ml-100k/u.occupation'
jobs = [j[0] for j in pd.read_csv(path, header=None).values]

path = 'ml-100k/u.data'
header = ['user_id', 'item_id', 'rating', 'timestamp']
rating = pd.read_csv(path, sep='\t', names=header)
rating['timestamp'] = pd.to_datetime(rating['timestamp'], unit='s')
year = set(rating['timestamp'].dt.year)
year = list(sorted(year))
user['avg_rating'] = rating.groupby('user_id')['rating'].mean().reset_index(drop=True)
user['rating_count'] = rating.groupby('user_id')['rating'].count().reset_index(drop=True)

path = 'ml-100k/u.genre'
genre = [g[0] for g in pd.read_csv(path, sep='|', header=None).values]

path = 'ml-100k/u.item'
header = ['item_id', 'movie_title', 'release_date', 'video_release_date', 'IMDb_URL'] + genre
movie = pd.read_csv(path, sep='|', names=header, encoding='latin-1')
movie['release_date'] = pd.to_datetime(movie['release_date'])

tabs = st.tabs(['性別分布', '年齡分布', '職業分布', '職業平均評分統計', '地區分布'])
with tabs[0]:
    male = len(user[user['gender'] == 'M'].value_counts())
    female = len(user[user['gender'] == 'F'].value_counts())
    labels = ['Male', 'Female']
    values = [male, female]
    fig = go.Figure(data=[go.Pie(labels=labels, values=values)])
    st.plotly_chart(fig)

with tabs[1]:
    user.loc[user['gender'] == 'M', 'gender'] = 'Male'
    user.loc[user['gender'] == 'F', 'gender'] = 'Female'
    fig = px.histogram(user, x='age', color='gender', text_auto=True)
    st.plotly_chart(fig)

with tabs[2]:
    path = 'ml-100k/u.occupation'
    header = ['occupation']
    occupation = pd.read_csv(path, sep='|', names=header)
    male = [0] * len(occupation)
    female = [0] * len(occupation)
    jobs = []
    for i, row in occupation.iterrows():
        jobs.append(row['occupation'])
        male[i] = len(user[(user['occupation'] == row['occupation']) & (user['gender'] == 'Male')].value_counts())
        female[i] = len(user[(user['occupation'] == row['occupation']) & (user['gender'] == 'Female')].value_counts())
    fig = go.Figure(data=[
        go.Bar(name='Male', x=jobs, y=male, text=male, textposition='auto'),
        go.Bar(name='Female', x=jobs, y=female, text=female, textposition='auto'),
    ])
    fig.update_xaxes(tickangle=315)
    fig.update_layout(
        xaxis_title='Occupation',
        yaxis_title='Count',
    )
    st.plotly_chart(fig)

with tabs[3]:
    m = {}
    for j in jobs:
        m[j] = user[user['occupation'] == j]['avg_rating'].mean()
    jobs_avg_rating = [round(m[j], 2) for j in jobs]
    fig = go.Figure(data=go.Bar(x=jobs, y=jobs_avg_rating, text=jobs_avg_rating, textposition='auto'))
    fig.update_xaxes(tickangle=315)
    fig.update_layout(
        xaxis_title='Occupation',
        yaxis_title='Average rating',
    )
    st.plotly_chart(fig)

@st.cache(allow_output_mutation=True)
def get_address():
    nomi = pgeocode.Nominatim('us')
    addr = [] * len(user)
    for _, row in user.iterrows():
        location = nomi.query_postal_code(row['zip_code'])
        if not pd.isnull(location['latitude']) and not pd.isnull(location['longitude']):
            addr.append((location.latitude, location.longitude))
    return addr

with tabs[4]:
    addr = get_address()
    df = pd.DataFrame(addr, columns=['lat', 'lon'])
    fig = go.Figure(data=go.Scattergeo(
        locationmode='USA-states',
        lat=df['lat'],
        lon=df['lon'],
        mode='markers',
    ))
    fig.update_layout(
        geo_scope='usa',
    )
    st.plotly_chart(fig)

age_option = st.slider('年齡', 7, 73, (7, 15))
st.write('年齡範圍：', age_option[0], '歲 ~ ', age_option[1], '歲')
gender_option = st.multiselect('性別', ['男', '女'])
job_option = st.multiselect('職業', jobs)

user = user[(user['age'] >= age_option[0]) & (user['age'] <= age_option[1])]

if gender_option == ['男']:
    user = user[user['gender'] == 'Male']
elif gender_option == ['女']:
    user = user[user['gender'] == 'Female']

if job_option:
    user = user[user['occupation'].isin(job_option)]

st.write('共有', len(user), '人')

sort_option = st.selectbox('排序依據', ['年齡 (小 → 大)', '年齡 (大 → 小)', '評分 (高 → 低)', '評分 (低 → 高)', 
'評分數 (多 → 少)', '評分數 (少 → 多)'], index=0)

if sort_option == '年齡 (小 → 大)':
    user = user.sort_values(by=['age'], ascending=True)
elif sort_option == '年齡 (大 → 小)':
    user = user.sort_values(by=['age'], ascending=False)
elif sort_option == '評分 (高 → 低)':
    user = user.sort_values(by=['avg_rating'], ascending=False)
elif sort_option == '評分 (低 → 高)':
    user = user.sort_values(by=['avg_rating'], ascending=True)
elif sort_option == '評分數 (多 → 少)':
    user = user.sort_values(by=['rating_count'], ascending=False)
elif sort_option == '評分數 (少 → 多)':
    user = user.sort_values(by=['rating_count'], ascending=True)

for _, row in user.iterrows():
    container = st.container()
    container.title(f'No. {row["user_id"]}')
    container.markdown(f'年齡 ： {row["age"]} 歲')
    if row['gender'] == 'Male':
        container.write('性別 ： 男性')
    else:
        container.write('性別 ： 女性')
    container.markdown(f'職業 ： {row["occupation"]}')
    container.markdown(f'郵遞區號 ： {row["zip_code"]}')
    container.markdown(f'平均評分 ： {row["avg_rating"]:.2f}')
    container.markdown(f'評分數量 ： {row["rating_count"]}')
    expander = container.expander('詳細資料')
    tabs = expander.tabs(['評分分布', '每年平均評分', '每年評分數量', '評分類型統計'])
    with tabs[0]:
        r = rating.copy()
        r = rating[rating['user_id'] == row['user_id']]['rating'].tolist()
        r = [r.count(i) for i in [1, 2, 3, 4, 5]]
        fig, ax = plt.subplots()
        ax.set_xlabel('Rating')
        ax.set_ylabel('Number of ratings')
        ax.set_xticks(np.arange(1, 6, 1))
        ax.set_ylim(top=(max(r) // 10 + 2) * 10)
        ax.yaxis.set_major_locator(MaxNLocator(integer=True))
        p = ax.bar(np.arange(1, 6, 1), r)
        ax.bar_label(p)
        st.pyplot(fig)

    with tabs[1]:
        avg_rating = [0] * len(year)
        for i, y in enumerate(year):
            val = rating[(rating['user_id'] == row['user_id']) & (rating['timestamp'].dt.year == y)]['rating'].mean()
            avg_rating[i] = val if not pd.isnull(val) else 0

        fig, ax = plt.subplots()
        ax.set_xlabel('Year')
        ax.set_ylabel('Average rating')
        ax.set_xticks(year)
        ax.set_ylim(1, 5)
        ax.set_yticks(np.arange(0, 5.2, 0.2))

        ax.plot(year, avg_rating, 'go-', label='rating')
        ax.legend()

        for i, v in enumerate(avg_rating):
            ax.annotate(f'{v:0.2f}', (year[i], v), xytext=(year[i], v+0.1), ha='center')
        st.pyplot(fig)

    with tabs[2]:
        rating_count = [0] * len(year)
        for i, y in enumerate(year):
            val = len(rating[(rating['user_id'] == row['user_id']) & (rating['timestamp'].dt.year == y)])
            rating_count[i] = val if not pd.isnull(val) else 0

        fig, ax = plt.subplots()
        ax.set_xlabel('Year')
        ax.set_ylabel('Number of ratings')
        ax.set_xticks(year)
        mx = max(rating_count) 
        ax.set_ylim(top=(mx // 10 + 2) * 10)
        ax.yaxis.set_major_locator(MaxNLocator(integer=True))

        p = ax.bar(year, rating_count)
        ax.bar_label(p)
        st.pyplot(fig)
    
    with tabs[3]:
        df = pd.merge(rating[rating['user_id'] == row['user_id']], movie, on='item_id')
        genre_copy = genre.copy()
        genre_count = [0] * len(genre_copy)
        for i, g in enumerate(genre_copy):
            genre_count[i] = len(df[df[g] == 1])
        genre_copy = [genre_copy[i] for i in range(len(genre_copy)) if genre_count[i] != 0]
        genre_count = [i for i in genre_count if i != 0]
        fig = go.Figure(data=[go.Pie(labels=genre_copy, values=genre_count)])
        st.plotly_chart(fig)

In [None]:
%%writefile ./pages/knn_item.py
import os
import streamlit as st
import pandas as pd
from surprise import *
from surprise.model_selection import *

st.markdown('# 前 K 部最相似的電影')
st.markdown('## (Item-base) (KNN)')

def get_movie_id():
    file_name = 'ml-100k/u.item'
    rid_to_name = {}
    name_to_rid = {}
    with open(file_name, encoding='latin-1') as f:
        for line in f:
            line = line.split("|")
            rid_to_name[line[0]] = line[1]
            name_to_rid[line[1]] = line[0]
    return rid_to_name, name_to_rid

raw_id_to_name, name_to_raw_id = get_movie_id()
knn = st.selectbox('選擇方法', ['KNNBasic', 'KNNWithMeans', 'KNNWithZScore', 'KNNBaseline'], index=3)
sim = st.selectbox('相似度計算方法', ['cosine', 'msd', 'pearson', 'pearson_baseline'], index=3)
movie_name = st.selectbox('選擇電影', list(raw_id_to_name.values()))
number = st.text_input('輸入前幾部 (1 ~ 1682)', '10')

if knn == 'KNNBasic':
    algo = KNNBasic(sim_options={'name': sim, 'user_based': False})
elif knn == 'KNNWithMeans':
    algo = KNNWithMeans(sim_options={'name': sim, 'user_based': False})
elif knn == 'KNNWithZScore':
    algo = KNNWithZScore(sim_options={'name': sim, 'user_based': False})
elif knn == 'KNNBaseline':
    algo = KNNBaseline(sim_options={'name': sim, 'user_based': False})

data = Dataset.load_builtin('ml-100k')
trainset = data.build_full_trainset()
algo.fit(trainset)

movie_id = name_to_raw_id[movie_name]
movie_inner_id = algo.trainset.to_inner_iid(movie_id)
movie_neighbors = algo.get_neighbors(movie_inner_id, k=int(number))

st.write('前', number, '名和', movie_name, '最相似的電影')
movie_names = []
for movie_inner_id in movie_neighbors:
    movie_raw_id = algo.trainset.to_raw_iid(movie_inner_id)
    mn = raw_id_to_name[movie_raw_id]
    movie_names.append(mn)

path = 'ml-100k/u.genre'
genre = [g[0] for g in pd.read_csv(path, sep='|', header=None).values]

path = 'ml-100k/u.item'
header = ['item_id', 'movie_title', 'release_date', 'video_release_date', 'IMDb_URL'] + genre
df = pd.read_csv(path, sep='|', names=header, encoding='latin-1')
df['release_date'] = pd.to_datetime(df['release_date'])
df['release_date'] = df['release_date'].dt.date

df_copy = df.copy()
df_copy = df_copy[df_copy['movie_title'] == movie_name]
df_copy = df_copy[['movie_title', 'release_date'] + genre]
st.write(df_copy)

df = df[df['movie_title'].isin(movie_names)]
df = df[['movie_title', 'release_date'] + genre]
st.write(df)

In [None]:
%%writefile ./pages/knn_user.py
import streamlit as st
import pandas as pd
from surprise import *
from surprise.model_selection import *

st.markdown('# 前 K 個最相似的影評者')
st.markdown('## (User-based) (KNN)')

knn = st.selectbox('選擇方法', ['KNNBasic', 'KNNWithMeans', 'KNNWithZScore', 'KNNBaseline'], index=3)
sim = st.selectbox('相似度計算方法', ['cosine', 'msd', 'pearson', 'pearson_baseline'], index=3)
user_id = st.text_input('選擇影評者 (1 ~ 943)', '24')
number = st.text_input('輸入前幾個', '10')

if knn == 'KNNBasic':
    algo = KNNBasic(sim_options={'name': sim})
elif knn == 'KNNWithMeans':
    algo = KNNWithMeans(sim_options={'name': sim})
elif knn == 'KNNWithZScore':
    algo = KNNWithZScore(sim_options={'name': sim})
elif knn == 'KNNBaseline':
    algo = KNNBaseline(sim_options={'name': sim})

data = Dataset.load_builtin('ml-100k')
trainset = data.build_full_trainset()
algo.fit(trainset)

user_inner_id = algo.trainset.to_inner_iid(user_id)
user_neighbors = algo.get_neighbors(user_inner_id, k=int(number))

path = 'ml-100k/u.user'
header = ['user_id', 'age', 'gender', 'occupation', 'zip_code']
df = pd.read_csv(path, sep='|', names=header)

st.write('前', number, '名和', 'No.', user_id, '最相似的影評者')
df_copy = df.copy()
df_copy = df_copy[df_copy['user_id'] == int(user_id)]
st.write(df_copy)

user_ids = []

for user_inner_id in user_neighbors:
    user_raw_id = algo.trainset.to_raw_iid(user_inner_id)
    user_ids.append(int(user_raw_id))

df = df[df['user_id'].isin(user_ids)]
st.write(df)

In [None]:
%%writefile ./pages/recommend_movie.py
import streamlit as st
import pandas as pd
from surprise import *
from surprise.model_selection import *

st.title('電影推薦 (SVD)')

def get_top_n(predictions, user_id, n=10):
    top_n = []
    for uid, iid, true_r, est, _ in predictions:
        if uid == user_id:
            top_n.append((iid, est))
    top_n.sort(key=lambda x: x[1], reverse=True)
    return top_n[:n]

def get_data():
    data = Dataset.load_builtin(name='ml-100k', prompt=False)
    trainset = data.build_full_trainset()
    algo = SVD()
    algo.fit(trainset)
    testset = trainset.build_anti_testset()
    predictions = algo.test(testset)
    return predictions

user_id = st.text_input('請輸入使用者 ID (1 ~ 943)', '196')
num_movies = st.text_input('請輸入推薦電影數量 (1 ~ 1682)', '10')

predictions = get_data()
top_n = get_top_n(predictions, user_id, n=int(num_movies))


path = 'ml-100k/u.genre'
genre = [g[0] for g in pd.read_csv(path, sep='|', header=None).values]

path = 'ml-100k/u.item'
header = ['item_id', 'movie_title', 'release_date', 'video_release_date', 'IMDb_URL'] + genre
df = pd.read_csv(path, sep='|', names=header, encoding='latin-1')
df['release_date'] = pd.to_datetime(df['release_date'])
df['release_date'] = df['release_date'].dt.date

st.write('為使用者', user_id, '推薦的', num_movies, '部電影為 : ')
movie_name = []
realease_year = []
pred_rating = []
for movie_id, rating in top_n:
    movie_name.append(df[df['item_id'] == int(movie_id)]['movie_title'].values[0])
    realease_year.append(df[df['item_id'] == int(movie_id)]['release_date'].values[0])
    pred_rating.append(rating)
df = pd.DataFrame({'電影名稱': movie_name, '發行日期': realease_year, '預測評分': pred_rating})
st.write(df)

In [None]:
!streamlit run app.py & npx localtunnel --port 8501