In [30]:
from pyspark.sql import SparkSession

In [31]:
# Create Spark session
spark = SparkSession.builder.appName('fma_recommendation_system').getOrCreate()

In [32]:
from pymongo import MongoClient

In [33]:
# Set up MongoDB connection
client = MongoClient("mongodb://localhost:27017")

In [34]:
db = client['mfcc_database']
collection = db['mfcc_collection']

In [35]:
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, ArrayType, FloatType

# Define the schema for the dataframe
schema = StructType([
    StructField('_id', StringType(), True),
    StructField('artist_name', StringType(), True),
    StructField('tags', ArrayType(StringType()), True),  # Changed tags to ArrayType(StringType())
    StructField('genre', StringType(), True),
    StructField('plays', IntegerType(), True),
    StructField('title', StringType(), True),
    StructField('mfcc_features', ArrayType(FloatType(), True))
])


In [36]:
# Get data from collection, limited to 1000 documents
data = collection.find().limit(10000)

In [37]:
# Convert data into a Spark dataframe using the defined schema
df = spark.createDataFrame(list(data), schema=schema)

In [38]:
df.show()

+------+--------------------+------+----------------+-----+--------------------+--------------------+
|   _id|         artist_name|  tags|           genre|plays|               title|       mfcc_features|
+------+--------------------+------+----------------+-----+--------------------+--------------------+
|002112|       Lucky Dragons|[[, ]]|   Audio Collage|  140|            Untitled|[-115.737465, 158...|
|002074|      Thomas Dimuzio|[[, ]]|      Electronic|   52|             Poctoth|[-450.57495, 178....|
|002012|          White Mice|[[, ]]|           Noise| 1383|      The White Mice|[-13.433411, 136....|
|002073|      Thomas Dimuzio|[[, ]]|      Electronic|   61|           Skullshop|[-368.97177, 77.6...|
|002071|      Thomas Dimuzio|[[, ]]|      Electronic|  140|          Blind Lion|[-371.8308, 170.4...|
|002008|Weather (from Chi...|[[, ]]|Field Recordings|  187|            Track 12|[-426.25522, 150....|
|002105|       Lucky Dragons|[[, ]]|   Audio Collage| 7027|          Untitled 6|[-

In [39]:
# Assuming df is your Spark DataFrame
row_count = df.count()
print("Number of rows in DataFrame:", row_count)

Number of rows in DataFrame: 5000


In [40]:
# Drop rows with missing values in necessary columns
df = df.dropna(subset=["_id", "artist_name", "tags","genre","plays","title","mfcc_features"])

In [41]:
df.show()

+------+--------------------+------+----------------+-----+--------------------+--------------------+
|   _id|         artist_name|  tags|           genre|plays|               title|       mfcc_features|
+------+--------------------+------+----------------+-----+--------------------+--------------------+
|002112|       Lucky Dragons|[[, ]]|   Audio Collage|  140|            Untitled|[-115.737465, 158...|
|002074|      Thomas Dimuzio|[[, ]]|      Electronic|   52|             Poctoth|[-450.57495, 178....|
|002012|          White Mice|[[, ]]|           Noise| 1383|      The White Mice|[-13.433411, 136....|
|002073|      Thomas Dimuzio|[[, ]]|      Electronic|   61|           Skullshop|[-368.97177, 77.6...|
|002071|      Thomas Dimuzio|[[, ]]|      Electronic|  140|          Blind Lion|[-371.8308, 170.4...|
|002008|Weather (from Chi...|[[, ]]|Field Recordings|  187|            Track 12|[-426.25522, 150....|
|002105|       Lucky Dragons|[[, ]]|   Audio Collage| 7027|          Untitled 6|[-

In [42]:
# Assuming df is your Spark DataFrame
row_count = df.count()
print("Number of rows in DataFrame:", row_count)

Number of rows in DataFrame: 5000


In [43]:
from annoy import AnnoyIndex

In [75]:
# Initialize Annoy index
num_features = len(df.first()['mfcc_features'])
annoy_index = AnnoyIndex(num_features, 'angular')  # 'angular' distance works well with cosine similarity

In [46]:
# Initialize Annoy index
num_features = len(df_pandas['mfcc_features'][0])
annoy_index = AnnoyIndex(num_features, 'angular')  # 'angular' distance works well with cosine similarity

##### In our case 'angular' specifies the distance metric used by the index. In this case, 'angular' refers to the cosine similarity metric, which is well-suited for high-dimensional vector spaces. Cosine similarity measures the cosine of the angle between two vectors and is commonly used in recommendation systems to find similar items based on their feature vectors.

In [76]:
num_features

13

In [77]:
# Add items to Annoy index
for i, row in enumerate(df.collect()):
    audio_features = row['mfcc_features']
    annoy_index.add_item(i, audio_features)

In [78]:
# Build Annoy index
annoy_index.build(50)  # 50 trees for the index (adjust as needed)

True

In [83]:
def find_similar_items(audio_features, n=10):
    similar_items = annoy_index.get_nns_by_vector(audio_features, n)
    return [df.collect()[idx] for idx in similar_items]

In [86]:
first_audio_features = df.first()['mfcc_features']
similar_items = find_similar_items(first_audio_features)
for item in similar_items:
    print(item[0], item[5])
    print()

002112 Untitled

001979 WFMU v WFMU A

001771 Seasons of Swarm

001073 Onda Tocadisco

009705 Mud On The Turtle

014335 Relic

004162 Live at WFMU (Full set)

014339 Two Invitations

014892 Rosalie

003984 We Move in Waves



In [87]:
import random

# Function to show 10 random music IDs and titles
def show_random_music():
    random_selection = random.sample(df.collect(), 10)
    for item in random_selection:
        print("ID:", item[0], "| Title:", item[5])

# Show 10 random music IDs and titles initially
show_random_music()

# Function to prompt user for input and get nearest recommendations
def get_nearest_recommendations():
    while True:
        user_input = input("Enter music ID to get nearest recommendations (enter 's' to stop): ")
        if user_input == 's':
            break
        else:
            # Find similar items for the input music ID
            audio_features = df.filter(df._id == user_input).select("mfcc_features").collect()[0][0]
            similar_items = find_similar_items(audio_features)
            print("Nearest recommendations for music ID", user_input, ":")
            for item in similar_items:
                print("ID:", item[0], "| Title:", item[5])

# Prompt user for input and get nearest recommendations
get_nearest_recommendations()


ID: 004275 | Title: Magic Fairy Poof Dust
ID: 004456 | Title: St Jude Boys Choir
ID: 009281 | Title: A fading Pale Face
ID: 016008 | Title: This will work somehow
ID: 003782 | Title: Blond and Golden Johns
ID: 001802 | Title: Start from Scratch
ID: 001705 | Title: Underscore
ID: 016113 | Title: Damp aaf
ID: 009136 | Title: Into Infinity "ear" loop
ID: 004111 | Title: Bad Vibrations


Enter music ID to get nearest recommendations (enter 's' to stop):  003782


Nearest recommendations for music ID 003782 :
ID: 003782 | Title: Blond and Golden Johns
ID: 014758 | Title: Axis Mundi
ID: 003787 | Title: The Most Excruciating Vibe
ID: 008882 | Title: In the Forrest
ID: 008859 | Title: In the Forrest
ID: 007487 | Title: The Robot's Heel
ID: 003464 | Title: She
ID: 003842 | Title: Shes Too Fat
ID: 004225 | Title: Undertow
ID: 003781 | Title: They Were Wrong


Enter music ID to get nearest recommendations (enter 's' to stop):  008882


Nearest recommendations for music ID 008882 :
ID: 008882 | Title: In the Forrest
ID: 008859 | Title: In the Forrest
ID: 007867 | Title: Strawberry > I Dig You
ID: 003787 | Title: The Most Excruciating Vibe
ID: 004209 | Title: Random Rules
ID: 007122 | Title: Cover Art/Curragh of Kildare
ID: 014758 | Title: Axis Mundi
ID: 003590 | Title: 1000 Tears
ID: 003876 | Title: Manouche
ID: 003285 | Title: All Myself


Enter music ID to get nearest recommendations (enter 's' to stop):  003781


Nearest recommendations for music ID 003781 :
ID: 003781 | Title: They Were Wrong
ID: 004044 | Title: Little Lambs
ID: 014479 | Title: Soberbia Espiritual
ID: 007872 | Title: Saved
ID: 016217 | Title: When I'm 54
ID: 004097 | Title: Clammy Hands
ID: 003782 | Title: Blond and Golden Johns
ID: 003732 | Title: Blue Lambency Downward
ID: 007867 | Title: Strawberry > I Dig You
ID: 014268 | Title: Disciplina


Enter music ID to get nearest recommendations (enter 's' to stop):  s


In [90]:
from flask import Flask, render_template, request
import random

app = Flask(__name__)

# Function to show 10 random music IDs and titles
def show_random_music():
    random_selection = random.sample(df.collect(), 10)
    return random_selection

# Function to get nearest recommendations for a given music ID
def get_nearest_recommendations(music_id):
    # Find similar items for the input music ID
    audio_features = df.filter(df._id == music_id).select("mfcc_features").collect()[0][0]
    similar_items = find_similar_items(audio_features)
    return similar_items

@app.route('/')
def index():
    random_music = show_random_music()
    return render_template('index.html', random_music=random_music)

@app.route('/recommendations', methods=['POST'])
def recommendations():
    music_id = request.form['music_id']
    nearest_recommendations = get_nearest_recommendations(music_id)
    return render_template('recommendations.html', music_id=music_id, nearest_recommendations=nearest_recommendations)

if __name__ == '__main__':
    app.run(debug=True)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with stat
Traceback (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/muhammad/.local/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/muhammad/.local/lib/python3.10/site-packages/traitlets/config/application.py", line 1074, in launch_instance
    app.initialize(argv)
  File "/home/muhammad/.local/lib/python3.10/site-packages/traitlets/config/application.py", line 118, in inner
    return method(app, *args, **kwargs)
  File "/home/muhammad/.local/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 692, in initialize
    self.init_sockets()
  File "/home/muhammad/.local/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 331, in init_sockets


SystemExit: 1