<a href="https://colab.research.google.com/github/anishkarao1/arco/blob/main/ARCO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# import dependencies
from IPython.display import display, Javascript, Image
from google.colab.output import eval_js # compiling JavaScript code used in Python
from google.colab import files
from base64 import b64decode, b64encode
import cv2 # used to display image
import numpy as np
import PIL # Python Imaging Library to process images
import io # Input/Output for streaming
import html
import time

In [None]:
# converting the JavaScript file into an OpenCV image for compatibility and processing
def js_to_image(js_reply):
  """
  Params:
          js_reply: JavaScript object containing image from webcam
  Returns:
          img: OpenCV BGR image
  """
  # decode base64 image
  image_bytes = b64decode(js_reply.split(',')[1])
  # convert bytes to numpy array
  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)
  # decode numpy array into OpenCV BGR image
  img = cv2.imdecode(jpg_as_np, flags=1)

  return img

# creating and displaying a rectangle where the face is detected, over the image
def bbox_to_bytes(bbox_array):
  """
  Params:
          bbox_array: Numpy array (pixels) containing rectangle to overlay on video stream.
  Returns:
        bytes: Base64 image byte string
  """
  # convert array into PIL image
  bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
  iobuf = io.BytesIO()
  # format bbox into png for return
  bbox_PIL.save(iobuf, format='png')
  # format return string
  bbox_bytes = 'data:image/png;base64,{}'.format((str(b64encode(iobuf.getvalue()), 'utf-8')))
  return bbox_bytes

In [None]:
# initializing the Haar Cascade face detection model
face_cascade = cv2.CascadeClassifier(cv2.samples.findFile(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'))

In [None]:
def take_photo(filename='photo.jpg', quality=0.8):
  js = Javascript('''
    async function takePhoto(quality) {
      const div = document.createElement('div');
      const capture = document.createElement('button');
      capture.textContent = 'Capture';
      div.appendChild(capture);

      const video = document.createElement('video');
      video.style.display = 'block';
      const stream = await navigator.mediaDevices.getUserMedia({video: true});

      document.body.appendChild(div);
      div.appendChild(video);
      video.srcObject = stream;
      await video.play();

      // Resize the output to fit the video element.
      google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

      // Wait for Capture to be clicked.
      await new Promise((resolve) => capture.onclick = resolve);

      const canvas = document.createElement('canvas');
      canvas.width = video.videoWidth;
      canvas.height = video.videoHeight;
      canvas.getContext('2d').drawImage(video, 0, 0);
      stream.getVideoTracks()[0].stop();
      div.remove();
      return canvas.toDataURL('image/jpeg', quality);
    }
    ''')
  display(js)

  # get photo data
  data = eval_js('takePhoto({})'.format(quality))
  # get OpenCV format image
  img = js_to_image(data)
  # grayscale img
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
  print(gray.shape)
  # get face bounding box coordinates using Haar Cascade
  faces = face_cascade.detectMultiScale(gray)
  # draw face bounding box on image
  for (x,y,w,h) in faces:
      img = cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)
  # save image
  cv2.imwrite(filename, img)

  return filename

In [None]:
!pip install deepface # deep learning 9-layered neural network program for facial recognition, and facial emotion characterization

In [None]:
from deepface import DeepFace

In [None]:
#VIDEO STREAM
capture_duration = 7

# JavaScript to properly create our live video stream using our webcam as input
def video_stream():

  js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;

    var pendingResolve = null;
    var shutdown = false;

    function removeDom() {
       stream.getVideoTracks()[0].stop();
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }

var capturedFrame = false;

    function onAnimationFrame() {
      if (!shutdown && !capturedFrame) {
        window.requestAnimationFrame(onAnimationFrame);
      }
      if (pendingResolve) {
        var result = "";
        if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480);
          result = captureCanvas.toDataURL('image/jpeg', 0.8);
        }
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
        capturedFrame = true;
      }
    }

    async function createDom() {
      if (div !== null) {
        return stream;
      }

      div = document.createElement('div');
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '600px';
      document.body.appendChild(div);

      const modelOut = document.createElement('div');
      modelOut.innerHTML = "<span>Status:</span>";
      labelElement = document.createElement('span');
      labelElement.innerText = 'No data';
      labelElement.style.fontWeight = 'bold';
      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);

      video = document.createElement('video');
      video.style.display = 'block';
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');

      //accessing the Python variable inside JS
      var capture_duration_js = ''' + str(capture_duration) + ''' ;
      //closing the window after a specified amount of time
      setTimeout(function(){
        removeDom(); //close the window
      }, capture_duration_js * 1000); //converting seconds to milliseconds


      video.onclick = () => { shutdown = true; };

      stream = await navigator.mediaDevices.getUserMedia(
          {video: { facingMode: "environment"}});
      div.appendChild(video);

      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      imgElement.onclick = () => { shutdown = true; };
      div.appendChild(imgElement);

      const instruction = document.createElement('div');
      instruction.innerHTML =
          '<span style="color: red; font-weight: bold;">' +
          'When finished, click here or on the video to stop this demo</span>';
      div.appendChild(instruction);
      instruction.onclick = () => { shutdown = true; };

      video.srcObject = stream;
      await video.play();

      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 640; //video.videoWidth;
      captureCanvas.height = 480; //video.videoHeight;
      window.requestAnimationFrame(onAnimationFrame);

      return stream;
    }

    async function delay(ms) {
    return new Promise(resolve => setTimeout(resolve, ms));
    }

    async function stream_frame(label, imgData) {
      if (shutdown) {
        removeDom();
        shutdown = false;
        return '';
      }

      var preCreate = Date.now();
      stream = await createDom();

      var preShow = Date.now();
      if (label != "") {
        labelElement.innerHTML = label;
      }

      if (imgData != "") {
        var videoRect = video.getClientRects()[0];
        imgElement.style.top = videoRect.top + "px";
        imgElement.style.left = videoRect.left + "px";
        imgElement.style.width = videoRect.width + "px";
        imgElement.style.height = videoRect.height + "px";
        imgElement.src = imgData;
      }

      var preCapture = Date.now();
      await delay(5000);

      var result = "";
      if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480);
          result = captureCanvas.toDataURL('image/jpeg', 0.8);
      }

      var result = await new Promise(function(resolve, reject) {
        pendingResolve = resolve;
      });
      shutdown = false;

      return {'create': preShow - preCreate,
              'show': preCapture - preShow,
              'capture': Date.now() - preCapture,
              'img': result};
    }
    ''') #passing the variable to JS

  display(js)

def video_frame(label, bbox):
  data = eval_js('stream_frame("{}", "{}")'.format(label, bbox))
  return data


In [None]:
!pip install llama-index-llms-groq
!pip install llama-index
from llama_index.llms.groq import Groq
llm = Groq(model="llama3-70b-8192", api_key="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")

In [None]:
!pip install -q -U google-generativeai

# Import the Python SDK
import google.generativeai as genai
# Used to securely store your API key
from google.colab import userdata

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

In [None]:
!pip install google-api-python-client

In [None]:
from googleapiclient.discovery import build

# Function to search for a song and return its YouTube link
def get_youtube_song_link(api_key, query):
    """
    Search for a song on YouTube and return the link to the first result.

    Args:
        api_key (str): YouTube Data API key.
        query (str): The search query (song name).

    Returns:
        str: YouTube link to the top search result.
    """
    # Initialize the YouTube API client
    youtube = build("youtube", "v3", developerKey=api_key)

    # Perform the search
    search_response = youtube.search().list(
        q=query,
        part="id,snippet",
        maxResults=1,  # Get only the top result
        type="video"   # Search for videos only
    ).execute()

    # Extract the video ID and title
    if search_response["items"]:
        video_id = search_response["items"][0]["id"]["videoId"]
        video_title = search_response["items"][0]["snippet"]["title"]
        return f"Title: {video_title}\nLink: https://www.youtube.com/watch?v={video_id}", video_id
    else:
        return "No results found for your query."

# Replace with your YouTube Data API key
API_KEY = "AIzaSyASj5dKizgb5_rOpOpcgXqr0jerAFovoQc"

In [None]:
pip install urlextract

In [None]:
#using the groq api
def groq_api(key1, key2):
  #print ("key1: ", key1, " key2: ", key2)
  response = llm.complete(f"Sort through the top 50 GLOBAL SONGS. Based on the emotions depicted: {key1}, keeping the main emotion in mind: {key2}, output the top 5 related songs")
  response = response.text
  #print(response)
  return response


#using the gemini api
def gemini_api(response):
  #print ("key1: ", key1, " key2: ", key2)
  model = genai.GenerativeModel('gemini-2.0-flash-exp')
  ans = model.generate_content(f"Display ONLY the first song name and singet from {response} Don't write anything else. It should be one sentence only")
  ans = ans.text
  #print("Here is the top song: ", ans.text)
  return ans

#using the youtube api
def youtube_api(song_query):
  #print ("key1: ", key1, " key2: ", key2)
  #song_query = gemini_api()
  result = get_youtube_song_link(API_KEY, song_query)
  #print(result[0])

  #converting the output from the youtube api to a url
  from urlextract import URLExtract

  extractor = URLExtract()
  urls = extractor.find_urls(result[0])

  if urls:
    urls = urls[0]
  else:
    urls = "No Link Found"
  #print(urls)
  return urls

In [None]:
# code for webcam to start streaming video after it is captured
def start_video_button():

  video_stream()
  # label for video
  label_html = 'Capturing...'
  # initialze bounding box to empty
  bbox = ''
  count = 0


  while True:
      js_reply = video_frame(label_html, bbox)
      if not js_reply:
        break

      # convert JavaScript response to OpenCV Image
      img = js_to_image(js_reply["img"])

      predictions = DeepFace.analyze(img) # using the DeepFace classification model and labels
                                          # formulating the predictions of emotions, gender, age and race

      # displaying the predictions as output
      #print(predictions)
      #type(predictions[0])

      emotiondict = predictions[0]
      key1 = emotiondict['emotion']
      key2 = emotiondict['dominant_emotion']
      #print("\n", key1, "\n", key2)

      # create transparent overlay for bounding box
      bbox_array = np.zeros([480,640,4], dtype=np.uint8)

      # grayscale image for face detection
      gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

      # get face region coordinates
      faces = face_cascade.detectMultiScale(gray)
      # get face bounding box for overlay
      for (x,y,w,h) in faces:
        bbox_array = cv2.rectangle(bbox_array,(x,y),(x+w,y+h),(255,0,0),2)


      font = cv2.FONT_HERSHEY_SIMPLEX

      # Use putText() method fo inserting text on video
      cv2.putText(bbox_array,
                emotiondict['dominant_emotion'],
                (50, 50),
                font, 1,
                (0, 0, 225),
                2,
                cv2.LINE_4)


      bbox_array[:,:,3] = (bbox_array.max(axis = 2) > 0 ).astype(int) * 255
      # convert overlay of bbox into bytes
      bbox_bytes = bbox_to_bytes(bbox_array)
      # update bbox so next frame gets new overlay
      bbox = bbox_bytes

      break
  return (key1, key2)

In [None]:
!pip install gradio


In [None]:
import gradio as gr
from gradio import Markdown

# Function for "Your mood right now" button
def display_mood():
    key1, key2 = start_video_button()
    return key1, key2

def display_main_mood():
  key1, key2 = display_mood()
  return key2

# Function for "Generate song playlist" button
def generate_playlist(key1, key2):
    groq = groq_api(key1, key2)
    return groq

def return_top_song(groq):
    gemini = gemini_api(groq)
    return gemini

def generate_link(gemini):
    youtube = youtube_api(gemini)
    return Markdown(f"<a href='{youtube}' target='_blank'>{youtube}</a>")

# Title and tagline
with gr.Blocks() as app:
    gr.Markdown("# **ARCO**\n### _Striking the right chord_")

    # "Your mood right now" button
    mood_button = gr.Button("Your mood right now")
    mood_output = gr.Textbox(label="Your Mood", interactive=False)

    # Store outputs of display_mood in State variables
    key1_state = gr.State()
    key2_state = gr.State()
    mood_button.click(display_mood, outputs=[key1_state, key2_state]).then(lambda key1, key2: key2, inputs=[key1_state, key2_state], outputs=mood_output)  # Display key2 (main mood)

    # "Generate song playlist" button
    playlist_button = gr.Button("Generate playlist")
    playlist_output = gr.Textbox(label="Generated Playlist", interactive=False)
    #playlist = generate_playlist(key1, key2)
    playlist_button.click(generate_playlist, inputs=[key1_state, key2_state], outputs=playlist_output)

    # "Generate song link" button
    link_button = gr.Button("Youtube Link")
    link_output = gr.HTML(label="Get Link")
    # Chain the functions using .then()
    playlist_button.click(generate_playlist, inputs=[key1_state, key2_state], outputs=playlist_output
                         ).then(return_top_song, inputs=playlist_output, outputs=link_output
                         ).then(generate_link, inputs=link_output, outputs=link_output)


# Launch the app
app.launch(debug=True)