In [None]:
'''to make the chromadb'''

import asyncio
import ollama
import chromadb
import pandas as pd
import time
import os
from chromadb.utils import embedding_functions

# Load dataset
df = pd.read_csv("final_final.csv")
print(f"Loaded dataset with {len(df)} songs.")

# Load previously processed songs
PROGRESS_FILE = "processed_songs.txt"
processed_songs = set()
if os.path.exists(PROGRESS_FILE):
    with open(PROGRESS_FILE, "r") as f:
        processed_songs = set(f.read().splitlines())

# Filter out already processed songs
df = df[~df["Track ID"].astype(str).isin(processed_songs)]
print(f"Skipping {len(processed_songs)} already processed songs. {len(df)} remaining.")

# Setup ChromaDB
chroma_client = chromadb.PersistentClient(path="./song_db4")
collection = chroma_client.get_or_create_collection(
    name="songs",
    embedding_function=embedding_functions.SentenceTransformerEmbeddingFunction(
        model_name="all-MiniLM-L6-v2"
    )
)

# Cache for storing processed results
song_cache = {}

# Create client pool for faster processing
async def create_client_pool(pool_size=10):
    return [ollama.AsyncClient() for _ in range(pool_size)]

# Get a client from the pool using round-robin
def get_client(client_pool, index):
    return client_pool[index % len(client_pool)]

# Faster summary and emotion extraction
async def get_song_info_combined(song_title, artist, language, client, max_retries=3):
    cache_key = f"{song_title}|{artist}|{language}"
    if cache_key in song_cache:
        print(f"✔️ [CACHE HIT] {song_title} - {artist}")
        return song_cache[cache_key]

    prompt = f"""
    '{language}' song '{song_title}' by '{artist}'. 
    Provide:
    1. Brief summary of the song and its meaning and feeling.(max 2-3 sentences).
    2. Primary emotion (Very Sad/Moderately Sad/Little Sad/Okayish/Giddy/Pleasant/Party!!/Yikes/Angry/Spooky).Choose ONLY from this list.
    Format: Summary: [summary] Emotion: [emotion]
    """
    
    print(f"🔄 Querying Ollama for: {song_title} - {artist}")

    for attempt in range(max_retries):
        try:
            response = await client.chat(model="mistral", messages=[{"role": "user", "content": prompt}])
            content = response["message"]["content"]

            # Extract summary & emotion
            parts = content.split("Emotion:")
            summary = parts[0].replace("Summary:", "").strip()
            emotion = parts[1].strip() if len(parts) > 1 else "Unknown"

            # Store in cache
            result = (summary, emotion)
            song_cache[cache_key] = result
            print(f"✅ Processed: {song_title} - {artist} | Emotion: {emotion}")

            return result
        except Exception as e:
            print(f"⚠️ Error processing {song_title} - {artist}: {e}")
            await asyncio.sleep(2 ** attempt)  # Exponential backoff
    
    return "Error processing", "Unknown"

# Optimized song processing
async def process_songs(df):
    batch_size = 500  # Increased batch size
    concurrency_limit = 100  # Increased concurrency
    client_pool_size = 10  # More Ollama clients
    accumulation_threshold = 200  # ChromaDB batch insert every 1000 songs
    max_runtime = 7 * 60 * 60  # 7 hours in seconds
    start_time = time.time()

    # Create client pool
    client_pool = await create_client_pool(client_pool_size)
    semaphore = asyncio.Semaphore(concurrency_limit)

    # Accumulators for batch insertion
    accumulated_docs, accumulated_metas, accumulated_ids = [], [], []

    async def process_song(idx, row):
        nonlocal accumulated_docs, accumulated_metas, accumulated_ids

        # Stop processing after 7 hours
        if time.time() - start_time > max_runtime:
            print("⏳ 7 hours reached. Stopping execution...")
            return

        async with semaphore:
            client = get_client(client_pool, idx)
            summary, emotion = await get_song_info_combined(row["song_title"], row["artist"], row["language"], client)

            # Prepare ChromaDB entry
            accumulated_docs.append(summary)
            accumulated_metas.append({"song_title": row["song_title"], "artist": row["artist"], "emotion": emotion})
            accumulated_ids.append(row["Track ID"])

            # Batch insert into ChromaDB
            if len(accumulated_docs) >= accumulation_threshold:
                print(f"💾 Inserting {len(accumulated_docs)} records into ChromaDB...")
                collection.add(documents=accumulated_docs, metadatas=accumulated_metas, ids=accumulated_ids)
                
                # Save progress
                with open(PROGRESS_FILE, "a") as f:
                    for song_id in accumulated_ids:
                        f.write(str(song_id) + "\n")

                accumulated_docs, accumulated_metas, accumulated_ids = [], [], []

        # Print progress every 500 songs
        if idx % 500 == 0:
            print(f"🚀 Processed {idx}/{len(df)} songs...")

    print("🚀 Starting processing...")
    tasks = [process_song(idx, row) for idx, row in df.iterrows()]
    await asyncio.gather(*tasks)

    # Insert remaining records
    if accumulated_docs:
        print(f"💾 Final insert of {len(accumulated_docs)} remaining songs...")
        collection.add(documents=accumulated_docs, metadatas=accumulated_metas, ids=accumulated_ids)

        # Save progress
        with open(PROGRESS_FILE, "a") as f:
            for song_id in accumulated_ids:
                f.write(str(song_id) + "\n")

    elapsed_time = (time.time() - start_time) / 60
    print(f"✅ Completed processing {len(df)} songs in {elapsed_time:.2f} minutes.")

# Run processing
await process_songs(df)


Loaded dataset with 25057 songs.
Skipping 14000 already processed songs. 11057 remaining.


  from .autonotebook import tqdm as notebook_tqdm


🚀 Starting processing...
🔄 Querying Ollama for: Varuvaalo Devi - P. Jayachandran|D. Kousalya|S. H. Subramanyam
🔄 Querying Ollama for: Thaai Thantha Pichchaiyile - P. Susheela
🔄 Querying Ollama for: Thanjavooru Melam - S. P. Balasubrahmanyam
🔄 Querying Ollama for: Dhaagam Theernthathadi - T.M. Soundararajan|P. Susheela
🔄 Querying Ollama for: En Kathalin Raja - Jency Antony|Jolly Abraham
🔄 Querying Ollama for: Adi Aatthi - T.M. Soundararajan
🔄 Querying Ollama for: Thiru Muruga - Sirkazhi Govindarajan
🔄 Querying Ollama for: Billa Billa - Vani Jairam
🔄 Querying Ollama for: Vetkamai Irukkuthadi - P Leela|S. Rajalakshmi
🔄 Querying Ollama for: Pethavale Perivavale - Jesudas
🔄 Querying Ollama for: Naan Pesa Ninaippathelam - T.M. Soundararajan|P. Susheela
🔄 Querying Ollama for: Isaikkavo - S. Janaki|P. Jayachandran
🔄 Querying Ollama for: Vaazhutharum Varththaigal - B.S. Sasirekha
🔄 Querying Ollama for: Ododi Vilayadu - S. P. Balasubrahmanyam|S. Janaki|Dinesh
🔄 Querying Ollama for: Devathai Our 

In [None]:
'''code to summarise diary entry'''

import ollama

def summarize_diary_entry(diary_entry: str) -> str:
    """Summarizes a diary entry using Ollama."""
    prompt = f"""
    Summarize the following diary entry in a short, meaningful way. The user will mention what language they want to listen to music in. Add that line in the summary/
    Keep the key emotions and main points but make it concise:
    
    {diary_entry}
    """
    response = ollama.chat(model="llama3.2",messages=[{"role": "user", "content": prompt}])
    return response['message']['content']  # Extracting the summary from Ollama's response

# Example Usage:
# diary_text = """
# We had a party today, but it went terrible. Firstly, someone spilled a drink on me. And then my ex was there, completely ignroing me, I wanted to die, UGH.
# I hate him why was he there?! You know what's worse, he was there with some new girl ! As if we didn't just break up two days ago, how could he ? Was he
# always interested in her ? Was he cheating one me?? This sucks so bad oh my god. And yet I have this yearning and miss  him so much, I feel like crying. I want to listen to some hindi songs."""

diary_text = """
Today was a great day ! I aced all my tests, and had lots of good food, and then my crush asked me out too!! What a day, I wish it would never end. I want to listen to some Telegu songs which will match my mood"""

summary = summarize_diary_entry(diary_text)
print("Diary Summary:", summary)



Diary Summary: Here is a concise summary of the diary entry:

Today was an amazing day! The user aced their tests, had delicious food, and received a romantic invitation from their crush. They're feeling on top of the world and want to listen to some upbeat Telegu music to match their joyful mood.


In [1]:
# import chromadb

# # Connect to ChromaDB
# client = chromadb.PersistentClient(path="./song_db4")  # Change to your actual path
# collection = client.get_collection("songs")  # Ensure this is your collection name

# # Get all songs in the collection
# all_songs = collection.get(include=['metadata'])  # Retrieve stored metadata

# # Count songs per emotion
# emotion_counts = {}

# for metadata in all_songs['metadatas']:
#     emotion = metadata.get("emotion", "Unknown")  # Ensure the key exists
#     emotion_counts[emotion] = emotion_counts.get(emotion, 0) + 1

# # Print results
# for emotion, count in emotion_counts.items():
#     print(f"{emotion}: {count} songs")


In [7]:
# import chromadb

# client = chromadb.PersistentClient(path="./song_db4")  # Ensure the correct path
# collection = client.get_collection("songs")  # Ensure this is your collection name

# # Define your desired language and emotion
# desired_language = "Kannada"
# desired_emotion = "Very Sad"

# # Step 1: Retrieve Kannada songs matching the emotion
# kannada_songs = collection.query(
#     query_texts=[desired_emotion],
#     n_results=15,
#     where={"language": desired_language}
# )

# # Extract relevant details safely
# kannada_results = []
# if kannada_songs.get("documents") and kannada_songs.get("metadatas"):
#     for song, meta in zip(kannada_songs["documents"], kannada_songs["metadatas"]):
#         if song:  # Check if song list is not empty
#             kannada_results.append({
#                 "title": song[0],  # Extract the first song title
#                 "artist": meta.get("artist", "Unknown Artist"),
#                 "language": desired_language
#             })

# # Step 2: If Kannada songs < 10, fetch English songs to fill the gap
# if len(kannada_results) < 10:
#     remaining_needed = 15 - len(kannada_results)
    
#     english_songs = collection.query(
#         query_texts=[desired_emotion],
#         n_results=remaining_needed,
#         where={"language": "English"}
#     )

#     english_results = []
#     if english_songs.get("documents") and english_songs.get("metadatas"):
#         for song, meta in zip(english_songs["documents"], english_songs["metadatas"]):
#             if song:  # Ensure song list is not empty
#                 english_results.append({
#                     "title": song[0],
#                     "artist": meta.get("artist", "Unknown Artist"),
#                     "language": "English"
#                 })

#     # Combine results, Kannada first
#     final_results = kannada_results + english_results
# else:
#     final_results = kannada_results

# # Display the results
# if final_results:
#     print("\n🎵 Recommended Songs:\n")
#     for idx, song in enumerate(final_results, 1):
#         print(f"{idx}. {song['title']} - {song['artist']} ({song['language']})")
# else:
#     print("No matching songs found.")


No matching songs found.


In [30]:
# # add language


# import pandas as pd
# import chromadb

# # Load the CSV file
# csv_path = "final_final.csv"  # Change to your actual path
# df = pd.read_csv(csv_path)

# # Initialize ChromaDB client
# client = chromadb.PersistentClient(path="./song_db4")  # Ensure path is correct
# collection = client.get_collection("songs")

# # Step 1: Retrieve existing data
# existing_data = collection.get(include=["documents", "metadatas"])

# # Step 2: Create a dictionary for quick lookup from CSV
# language_dict = dict(zip(df["Track ID"], df["language"]))

# # Step 3: Update metadata with language
# updated_metadatas = []
# k=0
# for song_id, metadata in zip(existing_data["ids"], existing_data["metadatas"]):
#     print("processing ",k, " entry")
#     k=k+1
#     track_id = metadata.get("Track ID")  # Ensure this key exists in ChromaDB metadata
#     if track_id in language_dict:
#         metadata["language"] = language_dict[track_id]  # Add language
#     else:
#         metadata["language"] = "Unknown"  # Default if no match found
#     updated_metadatas.append(metadata)

# # Step 4: Reinsert updated documents into ChromaDB
# collection.upsert(
#     ids=existing_data["ids"],
#     documents=existing_data["documents"],
#     metadatas=updated_metadatas
# )

# print("✅ Language metadata updated successfully!")


In [None]:
''' Assigning Weather and Time Labels to the songs '''


import pandas as pd

# Load your dataset with audio features
df = pd.read_csv("final_final.csv")  # Your dataset with song attributes

# Function to classify weather based on song attributes
def classify_weather(row):
    energy = row['energy']
    danceability = row['danceability']
    acousticness = row['acousticness']
    tempo = row['tempo']
    mode = row['mode']  # 1 = major, 0 = minor

    if energy > 0.7 and danceability > 0.6:
        return "Sunny"
    elif acousticness > 0.6 and energy < 0.4:
        return "Rainy"
    elif energy > 0.5 and acousticness < 0.5:
        return "Cloudy"
    elif energy > 0.8 and tempo > 120:
        return "Stormy"
    elif tempo < 80 and acousticness > 0.7:
        return "Snowy"
    elif energy < 0.4 and danceability < 0.5:
        return "Foggy"
    else:
        return "Clear"

# Function to classify time of day based on song attributes
def classify_time_of_day(row):
    energy = row['energy']
    danceability = row['danceability']
    acousticness = row['acousticness']
    tempo = row['tempo']
    mode = row['mode']  # 1 = major, 0 = minor

    if acousticness > 0.7 and energy < 0.4:
        return "Morning"
    elif energy > 0.7 and danceability > 0.6 and mode == 1:
        return "Afternoon"
    elif energy > 0.5 and danceability > 0.4:
        return "Evening"
    else:
        return "Night"

# Apply classification to each song
df["weather"] = df.apply(classify_weather, axis=1)
df["time_of_day"] = df.apply(classify_time_of_day, axis=1)

# Save updated dataset
df.to_csv("songs_with_weather_time.csv", index=False)


In [32]:
import pandas as pd

# Read your CSV file
df = pd.read_csv('songs_with_weather_time.csv')

# Get unique values and counts for weather
print("Weather categories:")
print(df['weather'].value_counts())

print("\nTime of day categories:")
print(df['time_of_day'].value_counts())

Weather categories:
weather
Clear     8695
Cloudy    5193
Rainy     3865
Sunny     3324
Foggy     2722
Stormy    1247
Snowy       11
Name: count, dtype: int64

Time of day categories:
time_of_day
Night        13913
Evening       6401
Morning       2896
Afternoon     1847
Name: count, dtype: int64


In [None]:
'''Update the chromadb with weather and time information'''

import chromadb
import pandas as pd

# Initialize the ChromaDB client
client = chromadb.PersistentClient(path="./song_db4")  # Ensure correct DB path
collection = client.get_collection("songs")

# Load CSV file (Replace 'your_file.csv' with the actual filename)
df = pd.read_csv("songs_with_weather_time.csv")

# Ensure we have relevant columns
if "Track ID" not in df.columns or "weather" not in df.columns or "time_of_day" not in df.columns:
    raise ValueError("CSV is missing required columns: 'Track ID', 'weather', 'time_of_day'")

# Iterate over each row and update ChromaDB metadata
for _, row in df.iterrows():
    track_id = row["Track ID"]
    
    # Fetch existing metadata for this track
    existing_entry = collection.get(ids=[track_id], include=["metadatas"])

    if existing_entry and existing_entry["metadatas"]:
        # Copy existing metadata and add weather & time_of_day
        updated_metadata = existing_entry["metadatas"][0]
        updated_metadata["weather"] = row["weather"]
        updated_metadata["time_of_day"] = row["time_of_day"]

        # Update the ChromaDB entry
        collection.update(
            ids=[track_id],
            metadatas=[updated_metadata]
        )

print("✅ Weather & Time updated successfully in ChromaDB!")

# Verify updates
print(collection.get(limit=5, include=["metadatas"]))


✅ Weather & Time updated successfully in ChromaDB!
{'ids': ['6x2WwLbdurnqOH8hBYtGd6', '24LASg8afs2Zj2tVU1kmfw', '2nLh92mONDTsqpPp4FyKvd', '4cBnZ0kg5ZHnTdINX7wpR8', '27x642yY4A4XEwbTb4NApx'], 'embeddings': None, 'documents': None, 'uris': None, 'data': None, 'metadatas': [{'artist': 'P.B. Sreenivas|S. P. Balasubrahmanyam', 'emotion': 'Moderately Sad', 'song_title': 'Nooru Kannu Saladu', 'time_of_day': 'Night', 'weather': 'Stormy'}, {'artist': 'S. Janaki|S. P. Balasubrahmanyam', 'emotion': 'Moderately Sad', 'song_title': 'Naguva Nayana', 'time_of_day': 'Night', 'weather': 'Clear'}, {'artist': 'S. P. Balasubrahmanyam', 'emotion': 'Moderately Sad', 'song_title': 'Geleyare Nanna Geleyare', 'time_of_day': 'Evening', 'weather': 'Clear'}, {'artist': 'K J Yesudas', 'emotion': 'Moderately Sad', 'song_title': 'Jo Jo Laali Naa Haaduve', 'time_of_day': 'Night', 'weather': 'Cloudy'}, {'artist': 'P.B. Sreenivas|P. Susheela', 'emotion': 'Moderately Sad', 'song_title': 'Nee Bandu Ninthaaga', 'time_of_d

In [2]:
from sentence_transformers import SentenceTransformer

# Load a transformer model for embedding text
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Convert diary entry into an embedding
def get_diary_embedding(diary_text):
    return embedding_model.encode(diary_text).tolist()




In [None]:
'''Finetuning the final code:'''


# import chromadb
# import torch
# from transformers import BertTokenizer, BertForSequenceClassification

# # Connect to ChromaDB
# client = chromadb.PersistentClient(path="./song_db4")
# collection = client.get_collection("songs")

# # Example diary entry
# diary_entry = "Oh my god. Today sucked so bad. There was NO internet. What was I supposed to do ??? And then don't get me started on project UGHHHH. IT'S NOT WORKING!!! The D Drive got filled completely, I'm literally out of space and nothing's working!!!! And this teacher is not listening to our concerns. Why is nothing go my way??? I want some Kannada and English songs that match my mood."


# model = BertForSequenceClassification.from_pretrained("./bert-emotion-classifier")
# tokenizer = BertTokenizer.from_pretrained("./bert-emotion-classifier")

# emotionsList = ["Very Sad", "Moderately Sad", "Little Sad", "Okayish", "Giddy", 
#                 "Pleasant", "Party!!", "Yikes", "Angry", "Spooky"]

# emotion_labels = {idx: emotion for idx, emotion in enumerate(emotionsList)}

# def predict_emotion(text):
#     inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
#     outputs = model(**inputs)
#     predicted_class = torch.argmax(outputs.logits).item()
    
#     return emotion_labels.get(predicted_class, "Unknown")


# # Convert diary entry into an embedding
# diary_embedding = get_diary_embedding(diary_entry)

# # Get current weather and time of day
# current_time_of_day = "Rainy"
# current_weather = "Evening"
# emotion = predict_emotion(diary_entry)




# # Define Emotional Journey Path
# emotion_transition_paths = {
#     "Very Sad": ["Moderately Sad", "Little Sad", "Okayish"],
#     "Moderately Sad": ["Little Sad", "Okayish", "Pleasantly"],
#     "Little Sad": ["Okayish", "Pleasantly", "Giddy"],
#     "Okayish": ["Pleasantly", "Giddy", "Party !!"],
#     "Giddy": ["Pleasantly", "Party !!", "Giddy"],
#     "Angry": ["Yikes, what a day", "Okayish", "Pleasantly"],
#     "Spooky": ["Okayish", "Giddy", "Party !!"],
# }


# journey_emotions = emotion_transition_paths.get(emotion, [emotion])

# recommended_songs = []


# # Fixing the where clause using $and
# for emotion in journey_emotions:
#     query_results = collection.query(
#         query_embeddings=[diary_embedding],  # Search by diary meaning
#         n_results=10,
#         where={
#             "$and": [
#                 {"emotion": {"$eq": emotion}},  # Matching detected emotion
#                 {"weather": {"$eq": current_weather}},  # Matching current weather
#                 {"time_of_day": {"$eq": current_time_of_day}}  # Matching current time of day
#             ]
#         }
#     )

#     if not query_results["ids"][0]:  
#         print("🔄 No songs matched with weather and time. Retrying with emotion only...")
#         query_results = collection.query(
#             query_embeddings=[diary_embedding],  
#             n_results=10,
#             where={
#                 "emotion": emotion
#             }
#         )
    
#     if query_results["ids"][0]:  
#         track_ids = query_results["ids"][0]  
#         metadatas = query_results["metadatas"][0]  

#         for track_id, meta in zip(track_ids, metadatas):
#             recommended_songs.append({
#                 "song_title": meta["song_title"],
#                 "artist": meta["artist"],
#                 "emotion": meta["emotion"],
#                 "track_id": track_id,
#                 "weather": meta.get("weather", "N/A"),
#                 "time_of_day": meta.get("time_of_day", "N/A")
#             })



# # Print the final emotional journey playlist
# print("\n🚀 Emotional Journey Playlist:\n")
# for idx, song in enumerate(recommended_songs, 1):
#     print(f"{idx}. {song['song_title']} - {song['artist']} (Track ID: {song['track_id']}, Emotion: {song['emotion']}, Weather: {song['weather']}, Time: {song['time_of_day']})")


# # # Extracting first elements of nested lists
# # documents = query_results["documents"][0]  # Song descriptions
# # metadatas = query_results["metadatas"][0]  # Song metadata
# # track_ids = query_results["ids"][0]  # Song IDs

# # print("\n🎵 Recommended Songs:\n")
# # for idx, (track_id, meta) in enumerate(zip(track_ids, metadatas), 1):
# #     print(f"{idx}. {meta['song_title']} - {meta['artist']} (Track ID: {track_id}, Weather: {meta['weather']}, Time: {meta['time_of_day']})")




🔄 No songs matched with weather and time. Retrying with emotion only...

🚀 Emotional Journey Playlist:

1. Whatcha Gonna Do - Koopsta Knicca (Track ID: 71aPbRvnDlTsGGpDkN700h, Emotion: Yikes, Weather: Sunny, Time: Afternoon)
2. Soil the Stillborn - Infant Annihilator (Track ID: 4vWB3h09KFwuS62Ex7NllR, Emotion: Yikes, Weather: Cloudy, Time: Night)


In [None]:
'''final code without having languages as a separate input'''


import chromadb
import torch
from transformers import BertTokenizer, BertForSequenceClassification

# Connect to ChromaDB
client = chromadb.PersistentClient(path="./song_db4")
collection = client.get_collection("songs")

# Example diary entry
# diary_entry = "Oh my god. Today sucked so bad. There was NO internet. What was I supposed to do ??? And then don't get me started on capstone UGHHHH. IT'S NOT WORKING!!! The D Drive got filled and nothing's working!!!! And this teacher is being weird and I know she's going to scold us tomorrow ughhh. Why is nothing going my way??? I want some Kannada and English songs that match my mood."

diary_entry = """

Today was a day filled with both challenges and rewards. I spent most of 
my morning struggling to understand some complex concepts in my advanced 
programming class, but eventually managed to solve the problem that had 
been bugging me for days. The sense of accomplishment I felt when it 
finally clicked was indescribable!

In the afternoon, I attended a workshop on User Experience Design (UX). 
Although it was challenging at first, I quickly found myself captivated by 
the potential impact UX can have on our daily lives. The speaker 
demonstrated several examples of poorly designed interfaces and how simple 
changes could make them more intuitive and user-friendly. It sparked an 
idea for a project I want to work on: improving the user experience of a 
popular social media platform.

Later, I caught up with some friends over dinner. We discussed our various 
projects and shared our struggles and triumphs from the past week. Hearing 
about their successes inspired me to keep pushing forward with my own 
goals. It's amazing how much we can learn from each other!

Tomorrow, I plan to start sketching out some ideas for my UX project. Wish 
me luck, dear Diary. 
"""


# Load emotion model
model = BertForSequenceClassification.from_pretrained("./bert-emotion-classifier")
tokenizer = BertTokenizer.from_pretrained("./bert-emotion-classifier")
languages = ['Bhojpuri','Assamese','English']
emotionsList = ["Very Sad", "Moderately Sad", "Little Sad", "Okayish", "Giddy", 
                "Pleasantly", "Party!!", "Yikes", "Angry", "Spooky"]
emotion_labels = {idx: emotion for idx, emotion in enumerate(emotionsList)}

def predict_emotion(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
    outputs = model(**inputs)
    predicted_class = torch.argmax(outputs.logits).item()
    return emotion_labels.get(predicted_class, "Unknown")

# Convert diary entry into an embedding
diary_embedding = get_diary_embedding(diary_entry)

# Get current weather and time of day
current_time_of_day = "Evening"
current_weather = "Rainy"
emotion = predict_emotion(diary_entry)
print("Emotion detected :",emotion)
# Define Emotional Journey Path
emotion_transition_paths = {
    "Very Sad": ["Very Sad", "Moderately Sad", "Little Sad","Okayish"],
    "Moderately Sad": ["Moderately Sad", "Little Sad", "Okayish", "Pleasantly"],
    "Little Sad": ["Little Sad","Okayish", "Pleasantly", "Giddy"],
    "Okayish": ["Okayish","Pleasantly", "Giddy", "Party!!"],
    "Giddy": ["Pleasantly", "Party !!", "Giddy"],
    "Angry": ["Yikes", "Okayish", "Pleasantly"],
    "Spooky": ["Spooky","Okayish", "Giddy", "Party!!"],
    "Yikes":["Yikes","Little Sad","Okayish","Pleasantly"]
}

journey_emotions = emotion_transition_paths.get(emotion, [emotion])
recommended_songs = []




# Number of songs per emotion stage
songs_per_emotion = 10
min_songs_needed = 30
max_songs_allowed = 100

# Reset recommended songs list
recommended_songs = []

# **Step 1: Fetch 10 songs per emotion stage**
for emotion in journey_emotions:
    if len(recommended_songs) >= max_songs_allowed:
        break  # Stop if we already have enough songs

    query_results = collection.query(
        query_embeddings=[diary_embedding],  
        n_results=songs_per_emotion,  # Fetch 10 songs per emotion stage
        where={
            "$and": [
                {"emotion": {"$eq": emotion}},  
                {"weather": {"$eq": current_weather}},  
                {"time_of_day": {"$eq": current_time_of_day}}
            ]
        }
    )

    if query_results["ids"][0]:  
        for track_id, meta in zip(query_results["ids"][0], query_results["metadatas"][0]):
            recommended_songs.append({
                "song_title": meta["song_title"],
                "artist": meta["artist"],
                "emotion": meta["emotion"],
                "track_id": track_id,
                "weather": meta.get("weather", "N/A"),
                "time_of_day": meta.get("time_of_day", "N/A")
            })

# **Step 2: Relax weather constraint if needed**
if len(recommended_songs) < min_songs_needed:
    for emotion in journey_emotions:
        if len(recommended_songs) >= min_songs_needed:
            break

        query_results = collection.query(
            query_embeddings=[diary_embedding],  
            n_results=songs_per_emotion,
            where={
                "$and": [
                    {"emotion": {"$eq": emotion}},  
                    {"time_of_day": {"$eq": current_time_of_day}}
                ]
            }
        )

        if query_results["ids"][0]:  
            for track_id, meta in zip(query_results["ids"][0], query_results["metadatas"][0]):
                recommended_songs.append({
                    "song_title": meta["song_title"],
                    "artist": meta["artist"],
                    "emotion": meta["emotion"],
                    "track_id": track_id,
                    "weather": meta.get("weather", "N/A"),
                    "time_of_day": meta.get("time_of_day", "N/A")
                })
                if len(recommended_songs) >= max_songs_allowed:
                    break 

# **Step 3: Relax both weather and time constraints if needed**
if len(recommended_songs) < min_songs_needed:
    for emotion in journey_emotions:
        if len(recommended_songs) >= min_songs_needed:
            break

        query_results = collection.query(
            query_embeddings=[diary_embedding],  
            n_results=songs_per_emotion,
            where={
                "emotion": emotion
            }
        )

        if query_results["ids"][0]:  
            for track_id, meta in zip(query_results["ids"][0], query_results["metadatas"][0]):
                recommended_songs.append({
                    "song_title": meta["song_title"],
                    "artist": meta["artist"],
                    "emotion": meta["emotion"],
                    "track_id": track_id,
                    "weather": meta.get("weather", "N/A"),
                    "time_of_day": meta.get("time_of_day", "N/A")
                })
                if len(recommended_songs) >= max_songs_allowed:
                    break 

# Print the final emotional journey playlist
print("\n🚀 Emotional Journey Playlist:\n")
for idx, song in enumerate(recommended_songs[:min_songs_needed], 1):
    print(f"{idx}. {song['song_title']} - {song['artist']} (Track ID: {song['track_id']}, Emotion: {song['emotion']}, Weather: {song['weather']}, Time: {song['time_of_day']})")



Emotion detected : Okayish

🚀 Emotional Journey Playlist:

1. Make It - Patrick Di Stefano (Track ID: 1hFdyeOuu5PPBCNmnBFrsD, Emotion: Okayish, Weather: Cloudy, Time: Evening)
2. Today - Tom Scott (Track ID: 0u1URCwVrREjrQpQ6D1YQD, Emotion: Okayish, Weather: Cloudy, Time: Evening)
3. Hold On - Wilson Phillips (Track ID: 4VZDv8sASBS8UruUBGTFdk, Emotion: Okayish, Weather: Cloudy, Time: Evening)
4. Pedestrian at Best - Courtney Barnett (Track ID: 7w7qzJd1j5FQVexn9TuxJw, Emotion: Okayish, Weather: Cloudy, Time: Evening)
5. Living - 2 Chainz (Track ID: 5oUSs8Lm0Tuj4Yns2olYz8, Emotion: Okayish, Weather: Cloudy, Time: Evening)
6. Bless My Soul - Nightmares On Wax (Track ID: 4kk7HzP1XZ4mdcvGRDxlp7, Emotion: Okayish, Weather: Cloudy, Time: Evening)
7. Let Me Tell U - Jimmy Edgar (Track ID: 0Azy5AlshvVPsx2dA3jED3, Emotion: Okayish, Weather: Cloudy, Time: Evening)
8. Unfinished - Mandisa (Track ID: 597GRV7fJ030diN3BTWpKS, Emotion: Okayish, Weather: Cloudy, Time: Evening)
9. On Our Own (feat. Nata

In [None]:
''' all unique emotions in chromaDB '''

query_results = collection.get(include=["metadatas"])  # Get all metadata

# Extract all emotions from the metadata
all_emotions = {meta["emotion"] for meta in query_results["metadatas"] if "emotion" in meta}

# Print unique emotions
print("Unique emotions in ChromaDB:", all_emotions)


Unique emotions in ChromaDB: {'Pleasant (with a tinge of Sadness)', 'Giddy/Pleasant (due to the melancholic beauty of the song)', 'Okayish (with a hint of determination and resilience)', 'Yikes (due to the intense feelings of longing and sadness expressed in the song)', 'Giddy/Pleasant (as it has elements of strength and resilience in the face of heartbreak)', 'Okayish', 'Pleasant (with a tinge of wistful longing)', 'Giddy (due to its catchy tune and lyrics expressing hopeful longing)', 'Okayish (with a sense of acceptance and calmness)', 'Intense Longing/Yearning', 'Yikes', 'Angry/Sad', 'Angry/Spooky', 'Okayish (with a touch of nostalgia)', 'Okayish (reflective and introspective)', 'Pleasant (with underlying feelings of longing and love)', 'Giddy', 'Pleasant (with a touch of spiritual longing)', 'Pleasant (with a hint of melancholy)', 'Okayish (with a tinge of sadness)', 'Giddy/Party!! (due to the energetic and upbeat nature of the song)', 'Pleasant (with undertones of deep longing)',

In [None]:
'''language added as a separate atribute in chromadb'''


import chromadb
import pandas as pd

# Initialize the ChromaDB client
client = chromadb.PersistentClient(path="./song_db4")  # Ensure correct DB path
collection = client.get_collection("songs")

# Load CSV file (Replace 'your_file.csv' with the actual filename)
df = pd.read_csv("songs_with_weather_time.csv")

# Ensure we have relevant columns
required_columns = {"Track ID", "language"}
if not required_columns.issubset(df.columns):
    raise ValueError("CSV is missing required columns: 'Track ID', 'language'")

# Iterate over each row and update ChromaDB metadata
for _, row in df.iterrows():
    track_id = row["Track ID"]
    
    # Fetch existing metadata for this track
    existing_entry = collection.get(ids=[track_id], include=["metadatas"])

    if existing_entry and existing_entry["metadatas"]:
        # Copy existing metadata and add weather, time_of_day, and language
        updated_metadata = existing_entry["metadatas"][0]
        updated_metadata["language"] = row["language"]

        # Update the ChromaDB entry
        collection.update(
            ids=[track_id],
            metadatas=[updated_metadata]
        )

print("Language updated successfully in ChromaDB!")

# Verify updates
print(collection.get(limit=5, include=["metadatas"]))


Language updated successfully in ChromaDB!
{'ids': ['6x2WwLbdurnqOH8hBYtGd6', '24LASg8afs2Zj2tVU1kmfw', '2nLh92mONDTsqpPp4FyKvd', '4cBnZ0kg5ZHnTdINX7wpR8', '27x642yY4A4XEwbTb4NApx'], 'embeddings': None, 'documents': None, 'uris': None, 'data': None, 'metadatas': [{'artist': 'P.B. Sreenivas|S. P. Balasubrahmanyam', 'emotion': 'Moderately Sad', 'language': 'Kannada', 'song_title': 'Nooru Kannu Saladu', 'time_of_day': 'Night', 'weather': 'Stormy'}, {'artist': 'S. Janaki|S. P. Balasubrahmanyam', 'emotion': 'Moderately Sad', 'language': 'Kannada', 'song_title': 'Naguva Nayana', 'time_of_day': 'Night', 'weather': 'Clear'}, {'artist': 'S. P. Balasubrahmanyam', 'emotion': 'Moderately Sad', 'language': 'Kannada', 'song_title': 'Geleyare Nanna Geleyare', 'time_of_day': 'Evening', 'weather': 'Clear'}, {'artist': 'K J Yesudas', 'emotion': 'Moderately Sad', 'language': 'Kannada', 'song_title': 'Jo Jo Laali Naa Haaduve', 'time_of_day': 'Night', 'weather': 'Cloudy'}, {'artist': 'P.B. Sreenivas|P. Sus

In [2]:
from sentence_transformers import SentenceTransformer

# Load a transformer model for embedding text
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Convert diary entry into an embedding
def get_diary_embedding(diary_text):
    return embedding_model.encode(diary_text).tolist()




In [4]:
'''Final Code !'''


import chromadb
import torch
from transformers import BertTokenizer, BertForSequenceClassification

# Connect to ChromaDB
client = chromadb.PersistentClient(path="./song_db4")
collection = client.get_collection("songs")

# Example diary entry
diary_entry = """
Today I met my ex-fiance after 3 years. I honestly thought I was okay with us breaking up, but it seems I still haven't moved on. When I saw him on the altar with his new soon to be wife, I felt something in me break. It was probably my heart. I can't believe I'm still hung up over him, I hadn't even thought about him for so many years. 
Will I ever be able to love anyone else ever again ? Or will I always be hung up over him ? 
"""


# Load emotion model
model = BertForSequenceClassification.from_pretrained("./bert-emotion-classifier")
tokenizer = BertTokenizer.from_pretrained("./bert-emotion-classifier")

# Define available languages
emotionsList = ["Very Sad", "Moderately Sad", "Little Sad", "Okayish", "Giddy", 
                "Pleasantly", "Party!!", "Yikes", "Angry", "Spooky"]
emotion_labels = {idx: emotion for idx, emotion in enumerate(emotionsList)}

def predict_emotion(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
    outputs = model(**inputs)
    predicted_class = torch.argmax(outputs.logits).item()
    return emotion_labels.get(predicted_class, "Unknown")

# Convert diary entry into an embedding
diary_embedding = get_diary_embedding(diary_entry)

# Get current weather and time of day
current_time_of_day = "Evening"
current_weather = "Rainy"
languages = ['Bhojpuri', 'Assamese', 'English']

emotion = predict_emotion(diary_entry)
print("Emotion detected:", emotion)
print("Preferred Languages:", languages)

# Define Emotional Journey Path
emotion_transition_paths = {
    "Very Sad": ["Very Sad", "Moderately Sad", "Little Sad", "Okayish"],
    "Moderately Sad": ["Moderately Sad", "Little Sad", "Okayish", "Pleasantly"],
    "Little Sad": ["Little Sad", "Okayish", "Pleasantly", "Giddy"],
    "Okayish": ["Okayish", "Pleasantly", "Giddy", "Party!!"],
    "Giddy": ["Pleasantly", "Party!!", "Giddy"],
    "Angry": ["Yikes", "Okayish", "Pleasantly"],
    "Spooky": ["Spooky", "Okayish", "Giddy", "Party!!"],
    "Yikes": ["Yikes", "Little Sad", "Okayish", "Pleasantly"]
}

journey_emotions = emotion_transition_paths.get(emotion, [emotion])
recommended_songs = []

# Number of songs per emotion stage
songs_per_emotion = 10
songs_per_language = 5  # To balance across languages
min_songs_needed = 40
max_songs_allowed = 100

# Reset recommended songs list
recommended_songs = []

# **Step 1: Fetch songs per emotion stage, considering multiple languages**
for emotion in journey_emotions:
    if len(recommended_songs) >= max_songs_allowed:
        break  # Stop if we already have enough songs

    for language in languages:
        query_results = collection.query(
            query_embeddings=[diary_embedding],  
            n_results=songs_per_language,  # Fetch songs per language
            where={
                "$and": [
                    {"emotion": {"$eq": emotion}},  
                    {"weather": {"$eq": current_weather}},  
                    {"time_of_day": {"$eq": current_time_of_day}},
                    {"language": {"$eq": language}}
                ]
            }
        )

        if query_results["ids"][0]:  
            for track_id, meta in zip(query_results["ids"][0], query_results["metadatas"][0]):
                recommended_songs.append({
                    "song_title": meta["song_title"],
                    "artist": meta["artist"],
                    "emotion": meta["emotion"],
                    "track_id": track_id,
                    "language": meta["language"],
                    "weather": meta.get("weather", "N/A"),
                    "time_of_day": meta.get("time_of_day", "N/A")
                })

# **Step 2: Relax weather constraint if needed**
if len(recommended_songs) < min_songs_needed:
    for emotion in journey_emotions:
        for language in languages:
            if len(recommended_songs) >= min_songs_needed:
                break

            query_results = collection.query(
                query_embeddings=[diary_embedding],  
                n_results=songs_per_language,
                where={
                    "$and": [
                        {"emotion": {"$eq": emotion}},  
                        {"time_of_day": {"$eq": current_time_of_day}},
                        {"language": {"$eq": language}}
                    ]
                }
            )

            if query_results["ids"][0]:  
                for track_id, meta in zip(query_results["ids"][0], query_results["metadatas"][0]):
                    recommended_songs.append({
                        "song_title": meta["song_title"],
                        "artist": meta["artist"],
                        "emotion": meta["emotion"],
                        "track_id": track_id,
                        "language": meta["language"],
                        "weather": meta.get("weather", "N/A"),
                        "time_of_day": meta.get("time_of_day", "N/A")
                    })
                    if len(recommended_songs) >= max_songs_allowed:
                        break 

# **Step 3: Relax both weather and time constraints if needed**
if len(recommended_songs) < min_songs_needed:
    for emotion in journey_emotions:
        for language in languages:
            if len(recommended_songs) >= min_songs_needed:
                break

            query_results = collection.query(
                query_embeddings=[diary_embedding],  
                n_results=songs_per_language,
                where={
                    "$and": [
                        {"emotion": {"$eq": emotion}},
                        {"language": {"$eq": language}}

                    ]
                    
                    
                }
            )

            if query_results["ids"][0]:  
                for track_id, meta in zip(query_results["ids"][0], query_results["metadatas"][0]):
                    recommended_songs.append({
                        "song_title": meta["song_title"],
                        "artist": meta["artist"],
                        "emotion": meta["emotion"],
                        "track_id": track_id,
                        "language": meta["language"],
                        "weather": meta.get("weather", "N/A"),
                        "time_of_day": meta.get("time_of_day", "N/A")
                    })
                    if len(recommended_songs) >= max_songs_allowed:
                        break 

# Print the final emotional journey playlist
print("\n🚀 Emotional Journey Playlist:\n")
for idx, song in enumerate(recommended_songs[:min_songs_needed], 1):
    
    print(f"{idx}. {song['song_title']} - {song['artist']} ({song['language']}, Track ID: {song['track_id']}, Emotion: {song['emotion']}, Weather: {song['weather']}, Time: {song['time_of_day']})")
    print("------------------------------------------------")
    


Emotion detected: Very Sad
Preferred Languages: ['Bhojpuri', 'Assamese', 'English']

🚀 Emotional Journey Playlist:

1. Allah Ke Hajir Jaan Ke - Suresh Wadkar|Alka Yagnik (Bhojpuri, Track ID: 64so3eLK20bBQHjulKe3qG, Emotion: Very Sad, Weather: Clear, Time: Evening)
------------------------------------------------
2. Phoot Gaile Kismatiya - Mohdrafi - Mohammed Rafi (Bhojpuri, Track ID: 3RJmpReg4MwsWDvrZ6clz9, Emotion: Very Sad, Weather: Stormy, Time: Evening)
------------------------------------------------
3. Jahiya Gavna Ke Aai Doli - Bharat Sharma Vyas (Bhojpuri, Track ID: 5rqiaibqJq59CVO36e1O9z, Emotion: Very Sad, Weather: Cloudy, Time: Evening)
------------------------------------------------
4. Jigija Gijao - Bhupen Hazarika|Usha Mangeshkar (Assamese, Track ID: 00vP1XuCWn1JuImDuYZuaC, Emotion: Very Sad, Weather: Sunny, Time: Evening)
------------------------------------------------
5. Bar Dusta Bhaiti Amar - Nirmali Das (Assamese, Track ID: 0YcUEOcRW2nnLdFuYDlSBz, Emotion: Very Sad