In [None]:
!pip install streamlit transformers scikit-learn pandas


Collecting streamlit
  Downloading streamlit-1.39.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<6,>=2.1.5 (from streamlit)
  Downloading watchdog-5.0.3-py3-none-manylinux2014_x86_64.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.9/41.9 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Downloading streamlit-1.39.0-py2.py3-none-any.whl (8.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m44.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m45.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-5.0.3-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.3/79.3 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[

In [None]:
!pip install streamlit pyngrok


Collecting pyngrok
  Downloading pyngrok-7.2.0-py3-none-any.whl.metadata (7.4 kB)
Downloading pyngrok-7.2.0-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.0


In [None]:

%%writefile app.py

import streamlit as st
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
from pyngrok import ngrok

# Load pre-trained Transformer model (Chatbot)
@st.cache_resource
def load_chatbot():
    tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
    model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
    return tokenizer, model

tokenizer, model = load_chatbot()

# Load cleaned anime dataset
@st.cache_data
def load_data():
    df = pd.read_csv('/content/cleaned_anime_data.csv')
    return df

df = load_data()

# Normalize numerical features for Content-Based Filtering
scaler = MinMaxScaler()
numerical_features = ['Favorites', 'Scored By', 'Members', 'Episodes']
df[numerical_features] = scaler.fit_transform(df[numerical_features])

# One-hot encode categorical features
categorical_features = ['Genres']
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoded_data = encoder.fit_transform(df[categorical_features])
encoded_df = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out(categorical_features))

# Combine numerical and encoded features
features = pd.concat([df[numerical_features], encoded_df], axis=1)

# Create a similarity matrix based on features for Content-Based Filtering
similarity_matrix = cosine_similarity(features)

# Function to get content-based recommendations
def get_content_recommendations(anime_index, top_n=5):
    similarity_scores = list(enumerate(similarity_matrix[anime_index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similarity_scores = similarity_scores[1:top_n+1]
    recommendations = [df.iloc[i[0]]['Name'] for i in similarity_scores]
    return recommendations

# Chatbot function
def chat_with_bot(input_text):
    chat_input_ids = tokenizer.encode(input_text + tokenizer.eos_token, return_tensors='pt')
    chat_output = model.generate(chat_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)
    chatbot_reply = tokenizer.decode(chat_output[:, chat_input_ids.shape[-1]:][0], skip_special_tokens=True)
    return chatbot_reply


def recommend_by_genre(selected_genres):
    genre_df = df[df['Genres'].str.contains('|'.join(selected_genres), case=False, na=False)]
    if not genre_df.empty:
        # Calculate probability score based on the number of scored by for each anime
        total_scored_by = genre_df['Scored By'].sum()
        genre_df['Probability'] = genre_df['Scored By'] / total_scored_by

        # Sort anime by probability score in descending order
        genre_df = genre_df.sort_values('Probability', ascending=False)

        # Get top 20 anime with highest probability score
        top_20_anime = genre_df[['Name', 'Genres', 'Scored By', 'Probability']].head(20)

        st.dataframe(top_20_anime)
    else:
        st.write("No anime found matching your selected genres.")


# Streamlit UI for Anime Recommendation by Genre
st.title("Anime Recommendation Chatbot with Genre Selection")

# Display Genre Options
unique_genres = set([genre.strip() for sublist in df['Genres'].dropna().str.split(',') for genre in sublist])
unique_genres = sorted(unique_genres)

# Add a unique key to the multiselect element
selected_genres = st.multiselect("Select your preferred genres:", unique_genres, key="genre_selector")

if selected_genres:
    st.subheader("Recommended Anime Based on Your Genre Selection")
    recommend_by_genre(selected_genres)

# Streamlit app UI
st.title("Anime Recommendation & ChatGPT-like Chatbot")

# Tab-based navigation
tab = st.sidebar.selectbox("Choose Mode", ["Anime Recommendation", "Chat with Bot"])

# Anime Recommendation System
if tab == "Anime Recommendation":
    st.header("Content-Based Anime Recommendation System")

    # Select an anime for recommendation
    selected_anime = st.selectbox("Select an Anime", df['Name'])

    # Get the index of the selected anime
    anime_index = df[df['Name'] == selected_anime].index[0]

    # Display the recommendations
    recommendations = get_content_recommendations(anime_index)
    st.write(f"Because you selected **{selected_anime}**, we recommend:")
    for anime in recommendations:
        st.write(f"- {anime}")

# ChatGPT-like Chatbot
elif tab == "Chat with Bot":
    st.header("ChatGPT-like Conversational Bot")

    user_input = st.text_input("Ask the chatbot anything:")
    if user_input:
        bot_response = chat_with_bot(user_input)
        st.write(f"Chatbot: {bot_response}")



Overwriting app.py


In [None]:
from pyngrok import ngrok
!pkill ngrok

# Set your authtoken
ngrok.set_auth_token("Your_Token")

# Start ngrok using addr keyword for port 8501
public_url = ngrok.connect(addr="8501")  # Use addr="8501"
print(f"Streamlit app URL: {public_url}")

# Run the Streamlit app
!streamlit run app.py

Streamlit app URL: NgrokTunnel: "https://091e-34-73-49-13.ngrok-free.app" -> "http://localhost:8501"

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.73.49.13:8501[0m
[0m
2024-10-28 18:49:28.428925: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-28 18:49:28.479573: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-28 18:49:28.496337: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to

In [None]:


def recommend_by_genre(selected_genres):
    genre_df = df[df['Genres'].str.contains('|'.join(selected_genres), case=False, na=False)]
    if not genre_df.empty:
        # Calculate probability score based on the number of scored by for each anime
        total_scored_by = genre_df['Scored By'].sum()
        genre_df['Probability'] = genre_df['Scored By'] / total_scored_by

        # Sort anime by probability score in descending order
        genre_df = genre_df.sort_values('Probability', ascending=False)

        # Get top 20 anime with highest probability score
        top_20_anime = genre_df[['Name', 'Genres', 'Scored By', 'Probability']].head(20)

        st.dataframe(top_20_anime)
    else:
        st.write("No anime found matching your selected genres.")