In [2]:
from google.colab import drive

drive.mount('/content/drive/', force_remount=True)

Mounted at /content/drive/


In [3]:
!pip install -q streamlit

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.6/9.6 MB[0m [31m63.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 KB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m164.8/164.8 KB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m80.6/80.6 KB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.7/4.7 MB[0m [31m98.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m238.7/238.7 KB[0m [31m25.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 KB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.5/84.5 KB[0m [31m11.0 MB/s[0m

In [4]:

!pip install pyspark

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyspark
  Downloading pyspark-3.3.2.tar.gz (281.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m281.4/281.4 MB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting py4j==0.10.9.5
  Downloading py4j-0.10.9.5-py2.py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.7/199.7 KB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.3.2-py2.py3-none-any.whl size=281824025 sha256=4f76a2cbd3c9d976691f3efe338d528ee945b0a15ca94ad715fc499a54d6ceaf
  Stored in directory: /root/.cache/pip/wheels/6c/e3/9b/0525ce8a69478916513509d43693511463c6468db0de237c86
Successfully built pyspark
Installing collected packages: py4j, pyspa

## Create a streamlit app 


In [33]:
%%writefile app.py

import streamlit as st
from pyspark.sql import SparkSession
from pyspark.sql.functions import desc, col
from pyspark.ml.recommendation import ALSModel
from collections.abc import Iterable
from pyspark.sql.functions import explode
from pyspark.sql.types import DoubleType, IntegerType
from pyspark.ml.feature import StringIndexer
import altair as alt
import pandas as pd

# Load the trained ALS model
spark = SparkSession.builder.appName("RecommendationSystem").getOrCreate()
model = ALSModel.load("/content/drive/MyDrive/Colab Notebooks/PRS/best_model")


df2 = spark.read.csv("/content/drive/MyDrive/Colab Notebooks/PRS/cleanedata/products.csv", header=True, inferSchema=True)
df3 = spark.read.csv("/content/drive/MyDrive/Colab Notebooks/PRS/cleanedata/ratings.csv", header=True, inferSchema=True)
df1 = spark.read.csv("/content/drive/MyDrive/Colab Notebooks/PRS/cleanedata/customers.csv", header=True, inferSchema=True)

# Join ratings_df with customers_df and products_df on customerid and productid respectively
df4 = df3.join(df1, df3.customerid == df1.id)
df4 = df4.drop("id")
df = df4.join(df2, df3.productid == df2.id)
df = df.drop("id")

# Converting data type of rating to double type
df = df.withColumn("rate", df["rate"].cast(DoubleType()))

# Indexing customerid column into integer type
user_model = StringIndexer(inputCol="customerid", outputCol="customer_id_index").fit(df)
indexed = user_model.transform(df)
indexed_df = indexed.select(indexed["customer_id_index"].cast(IntegerType()).alias("customerid"), indexed["productid"], indexed["rate"])

# Indexing productid column into integer type
prod_model = StringIndexer(inputCol="productid", outputCol="prod_id_index").fit(indexed_df)
p_indexed = prod_model.transform(indexed_df)
als_df = p_indexed.select(p_indexed["customerid"], p_indexed["prod_id_index"].cast(IntegerType()).alias("productid"), p_indexed["rate"])



# Create a function that generates recommendations based on popularity
def generate_popular_recommendations():
    # Generate data for bar chart
    product_counts = df2.groupBy("id").count().orderBy(desc("count")).take(10)
    recommendations = [product[0] for product in product_counts]
    product_names = [df2.filter(col("id") == product[0]).select("name").collect()[0][0] if df2.filter(col("id") == product[0]).count() > 0 else "" for product in product_counts]
    product_counts = [product[0] for product in product_counts]
    chart_data = pd.DataFrame({"Product Name": product_names, "Number of Ratings": product_counts})
    
    # Display bar chart
    st.subheader("Top 10 Most Popular Products")
    chart = alt.Chart(chart_data).mark_bar().encode(x="Product Name", y="Number of Ratings")
    st.altair_chart(chart, use_container_width=True)
    
    return recommendations


def generate_collaborative_recommendations(customer_id):
    recommendations = model.recommendForUserSubset(
        spark.createDataFrame([(customer_id,)], ["customerid"]),
        numItems=10
    )
    recommendations = recommendations \
        .withColumn("rec_exp", explode("recommendations")) \
        .select('customerid', col("rec_exp.productid"), col("rec_exp.rating"))
    recommendations = recommendations.join(df2, recommendations.productid == df2.id) \
        .select("name", "unitprice") \
        .orderBy(desc("rating"))
    return recommendations







# Add page title
st.set_page_config(page_title="Product Recommendation System", page_icon=":guardsman:", layout="wide")

# Add custom CSS
st.markdown("""
<style>
    body {
        background-color: #f5f5f5;
    }
    .stButton button {
        background-color: #009688;
        color: white;
        font-weight: bold;
    }
    .stTable td {
        font-size: 18px;
        background-color: #ffffff;
        border: 1px solid #cccccc;
    }
    .stTable th {
        font-size: 20px;
        background-color: #009688;
        color: white;
        border: 1px solid #cccccc;
    }
</style>
""", unsafe_allow_html=True)

# Add title and description
st.title("Product Recommendation System")
st.markdown("This app recommends products to customers using a collaborative filtering algorithm.")

placeholder = st.empty()

with placeholder.container():
    # Row A   Some Basic information about data
    st.markdown('### General Details')
    col1, col2, col3 = st.columns(3)
    col1.metric("Number of Products", len(df2.select("id").distinct().collect()))
    col2.metric("Number of Ratings", str(df3.count()))
    col3.metric("Average Rating", str(round(df3.agg({"rate": "avg"}).collect()[0][0], 2)))


# Add sidebar with filters and metrics
st.sidebar.image("/content/drive/MyDrive/Colab Notebooks/PRS/prs.png", use_column_width=True)
st.sidebar.title("Filters")
recommendation_type = st.sidebar.radio("Select Recommendation Type:", ["Popularity-based", "Collaborative Filtering"])
if recommendation_type == "Collaborative Filtering":
    # Display Collaborative Filtering table for all users
    #st.write("Collaborative Filtering table for all users:")
    #nrecommendations = generate_all_users_collaborative_recommendations()
    #st.table(nrecommendations.toPandas())
    
    customer_id = st.number_input("Enter Customer ID (for Collaborative Filtering only):")
    user_exists = als_df.filter(col("customerid") == customer_id).count() > 0
    if not user_exists:
        st.warning("New user detected. Generating popularity-based recommendations.")
        recommendation_type = "Popularity-based"

# Generate recommendations
if st.button("Generate Recommendations"):
    if recommendation_type == "Popularity-based":
        recommendations = generate_popular_recommendations()
        product_info = df2.filter(col("id").isin(recommendations)).select("name", "unitprice").collect()
        st.subheader("Recommendations:")
        st.table(product_info)
    else:
        recommendations = generate_collaborative_recommendations(customer_id)
        st.subheader(f"Recommendations for customer ID {customer_id}:")
        st.table(recommendations.toPandas())


Overwriting app.py


## Install localtunnel

In [34]:
!npm install localtunnel

[K[?25h[37;40mnpm[0m [0m[30;43mWARN[0m [0m[35msaveError[0m ENOENT: no such file or directory, open '/content/package.json'
[0m[37;40mnpm[0m [0m[30;43mWARN[0m [0m[35menoent[0m ENOENT: no such file or directory, open '/content/package.json'
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No description
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No repository field.
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No README data
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No license field.
[0m
[K[?25h+ localtunnel@2.0.2
updated 1 package and audited 36 packages in 0.461s

3 packages are looking for funding
  run `npm fund` for details

found [92m0[0m vulnerabilities



## Run streamlit in background

In [35]:
!streamlit run /content/app.py &>/content/logs.txt &

## Expose the port 8501
Then just click in the `url` showed.

A `log.txt`file will be created.

In [36]:
!npx localtunnel --port 8501

[K[?25hnpx: installed 22 in 2.061s
your url is: https://nice-pets-share-35-184-192-197.loca.lt
^C
