In [None]:
!pip install deepgram-sdk==3.*
!pip install dataclasses typing-extensions verboselogs
!pip install python-dotenv
!pip install num2words
!pip install vaderSentiment



In [None]:
# Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved.
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
# SPDX-License-Identifier: MIT


import os
import httpx
import json
import re
import logging, verboselogs
import pandas as pd
import nltk
from dotenv import load_dotenv
from datetime import datetime
from num2words import num2words
from nltk.tokenize import sent_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
from nltk.tokenize import word_tokenize
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from deepgram import (
    DeepgramClient,
    DeepgramClientOptions,
    PrerecordedOptions,
    FileSource,
    )


# The list of common words such as "a", "an", etc.
nltk.download('stopwords')
# Download a collection of popular resources from the NLTK library
nltk.download('popular', quiet=True)
# Sentence tokenization (Splitting a text into individual senteces)
nltk.download('punkt')
# English vocabulary database
nltk.download('wordnet')
# Used for training language models or evaluating nlp algothms
nltk.download('brown')
# Used for performing sentiment analysis on text
nltk.download('vader_lexicon')


lemmatizer = WordNetLemmatizer()
analyzer = SentimentIntensityAnalyzer()
stemmer = SnowballStemmer("english")
stop_words = stopwords.words("english")


load_dotenv()



[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


False

In [None]:
def extract_transcript(AUDIO_FILE):
    try:
        # STEP 1 Create a Deepgram client using the API key in the environment variables
        config: DeepgramClientOptions = DeepgramClientOptions(
            verbose=logging.SPAM,
        )
        deepgram: DeepgramClient = DeepgramClient("03feccffc4af0aac76e14882c4b7e5b08b509b92", config)
        # OR use defaults
        # deepgram: DeepgramClient = DeepgramClient()

        # STEP 2 Call the transcribe_file method on the prerecorded class
        with open(AUDIO_FILE, "rb") as file:
            buffer_data = file.read()

        payload: FileSource = {
            "buffer": buffer_data,
        }

        options: PrerecordedOptions = PrerecordedOptions(
            model="nova",
            smart_format=True,
            utterances=True,
            punctuate=True,
            diarize=False,
            numerals=False,
        )

        before = datetime.now()
        response = deepgram.listen.prerecorded.v("1").transcribe_file(
            payload, options, timeout=httpx.Timeout(300.0, connect=10.0)
        )
        after = datetime.now()

        print(response.to_json(indent=4))
        audio_dir = os.path.dirname(AUDIO_FILE)

        # Construct the path for the JSON file in the same directory as the audio file
        json_filename = os.path.join(audio_dir, f"{AUDIO_FILE}.json")

        # Write the JSON data to the file in the same directory as the audio file
        with open(json_filename, "w") as json_file:
            json_file.write(response.to_json(indent=4))

        print(f"Transcript JSON file '{json_filename}' created successfully.")
        return json_filename

    except Exception as e:
        print(f"Exception: {e}")

In [None]:
# Extracting the transcript from the json file
# and saving it into a list
def json_to_list(transcription_file):
  sentences = []
  with open(transcription_file, "r") as file:
        data = json.load(file)
        result = data['results']['channels'][0]['alternatives'][0]['transcript']
        result = result.split('.')
        for sentence in result:
          sentences.append(sentence)
        return sentences


In [None]:
def clean(list):
  cleaned = []
  for i in list:
    sentence = sent_tokenize(i)
    for item in sentence:
      # Convert to lowercase
      text_lowercase = item.lower()
      # Remove punctuation
      text_without_punctuation = re.sub(r"[^\w\s]", "", text_lowercase)
      # Remove stopwords and stem words
      tokens = word_tokenize(text_without_punctuation)
      new_tokens = []
      for word in tokens:
        if word.isnumeric():
          word = num2words(word)
          new_tokens.append(lemmatizer.lemmatize(word))
          continue
        elif word not in stop_words:
          new_tokens.append(lemmatizer.lemmatize(word))
          continue
      # Join tokens back into a string
      cleaned_text = " ".join(new_tokens)
      cleaned.append(cleaned_text)
  return cleaned

In [None]:
# Use Vader library to get the polarity of the sentence
# Sentences with polarity more than zero are positive, whilst sentences with polarity less than zero are negative
# Sentences with zero polarity are neutral

def getPolarity(text):
    polarity = SentimentIntensityAnalyzer().polarity_scores(text)
    if polarity['compound'] > 0:
      sentiment = 'postive'
    elif polarity['compound'] < 0:
      sentiment = 'negative'
    else:
      sentiment = 'neutral'
    return sentiment,polarity

In [None]:
# https://www.youtube.com/watch?v=65ZCIFa6vpg

audio_file1 = "/content/Discover_How_Generative_AI_can_Revolutionize_Ecommerce_65ZCIFa6vpg.m4a"

json_transcript1 = extract_transcript(audio_file1)

pod1 = json_to_list(json_transcript1)

cleaned1 = clean(pod1)

cleaned1

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
                        "word": "issues",
                        "start": 104.16,
                        "end": 104.64,
                        "confidence": 0.9995117,
                        "punctuated_word": "issues"
                    },
                    {
                        "word": "with",
                        "start": 104.64,
                        "end": 104.96001,
                        "confidence": 0.9995117,
                        "punctuated_word": "with"
                    },
                    {
                        "word": "accuracy",
                        "start": 104.96001,
                        "end": 105.46001,
                        "confidence": 0.99902344,
                        "punctuated_word": "accuracy"
                    }
                ],
                "id": "32bb90b5-05d1-4d35-b984-790d5878f43a"
            },
            {
                "start": 105.920006

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
                    },
                    {
                        "word": "had",
                        "start": 103.840004,
                        "end": 104.16,
                        "confidence": 0.9995117,
                        "punctuated_word": "had"
                    },
                    {
                        "word": "issues",
                        "start": 104.16,
                        "end": 104.64,
                        "confidence": 0.9995117,
                        "punctuated_word": "issues"
                    },
                    {
                        "word": "with",
                        "start": 104.64,
                        "end": 104.96001,
                        "confidence": 0.9995117,
                        "punctuated_word": "with"
                    },
                    {
                        "word": "accuracy",
                        "start": 104.96001,
 

['youve probably already experienced amazing world generative ai application chatgpt dali',
 'technology ability potential seep every aspect life',
 'already fear ai gon na take job move u dystopian future like terminator',
 'said brings new opportunity ecommerce',
 'video im going talk high level generative ai different ai used previous ecommerce technology opportunity ecommerce generative ai',
 'let waste time',
 'let get',
 'welcome world generative ai',
 'generative ai',
 'generative ai fascinating branch artificial intel focus generating new original content including text image video',
 'although technology actually around recent development thats meant trained enormous datasets literally billion page text',
 'generative ai effectively take enormous data set turn model learning pattern relationship help generate new output',
 'generative ai found application art entertainment design data synthesis',
 'instance help artist content author exploring new possibility creating original

In [None]:
# https://www.youtube.com/watch?v=3dbqio7pYZU

audio_file2 = "/content/Artificial_Intelligence_AI_Impact_on_E_commerce_Industr_3dbqio7pYZU.m4a"

json_transcript2 = extract_transcript(audio_file2)

pod2 = json_to_list(json_transcript2)

cleaned2 = clean(pod2)

cleaned2

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
                        "confidence": 1.0,
                        "punctuated_word": "on"
                    },
                    {
                        "word": "other",
                        "start": 83.815,
                        "end": 84.135,
                        "confidence": 0.9995117,
                        "punctuated_word": "other"
                    },
                    {
                        "word": "messaging",
                        "start": 84.135,
                        "end": 84.615,
                        "confidence": 1.0,
                        "punctuated_word": "messaging"
                    },
                    {
                        "word": "platforms",
                        "start": 84.615,
                        "end": 85.115,
                        "confidence": 0.9995117,
                        "punctuated_word": "platforms"
                    },
             

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
                        "start": 83.415,
                        "end": 83.655,
                        "confidence": 0.9995117,
                        "punctuated_word": "even"
                    },
                    {
                        "word": "on",
                        "start": 83.655,
                        "end": 83.815,
                        "confidence": 1.0,
                        "punctuated_word": "on"
                    },
                    {
                        "word": "other",
                        "start": 83.815,
                        "end": 84.135,
                        "confidence": 0.9995117,
                        "punctuated_word": "other"
                    },
                    {
                        "word": "messaging",
                        "start": 84.135,
                        "end": 84.615,
                        "confidence": 1.0,
                       

['artificial intelligence impact ecommerce industry',
 'artificial intelligence',
 'ai artificial intelligence ability computer robot controlled computer task usually done human require human intelligence',
 'artificial intelligence impact ecommerce industry',
 'artificial intelligence impact ecommerce experience',
 'improved chatbots voice recognition augmented reality visual searching product recommendation fraud protection predictive inventory management',
 'discussion focus artificial intelligence work e commerce',
 'add value e commerce',
 'example artificial intelligence used e commerce',
 'improved chatbots',
 'chatbots e commerce artificially intelligent system online retailer deploy engage customer throughout customer journey',
 'e commerce store use chatbots answer question product directly website even messaging platform like whatsapp instagram facebook messenger',
 'reason e commerce business need chatbot',
 'provide two way communication established customer',
 'teach thin

In [None]:
# https://www.youtube.com/watch?v=GQjuEygcS3A

audio_file3 = "/content/The_Future_of_Ecommerce_9_Trends_That_Will_Exist_In_203_GQjuEygcS3A.m4a"

json_transcript3 = extract_transcript(audio_file3)

pod3 = json_to_list(json_transcript3)

cleaned3 = clean(pod3)

cleaned3

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
                        "confidence": 0.9868164,
                        "punctuated_word": "in"
                    },
                    {
                        "word": "general",
                        "start": 211.895,
                        "end": 212.395,
                        "confidence": 0.9807129,
                        "punctuated_word": "general."
                    },
                    {
                        "word": "number",
                        "start": 212.65001,
                        "end": 212.97,
                        "confidence": 0.9189453,
                        "punctuated_word": "Number"
                    },
                    {
                        "word": "4",
                        "start": 212.97,
                        "end": 213.45001,
                        "confidence": 0.92944336,
                        "punctuated_word": "4,"
                    },
        

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
                ],
                "id": "c4ee31a3-5271-41ba-93e6-5d029aba9c5f"
            },
            {
                "start": 211.735,
                "end": 216.19,
                "confidence": 0.97143555,
                "channel": 0,
                "transcript": "in general. Number 4, autonomous delivery drones could become mainstream.",
                "words": [
                    {
                        "word": "in",
                        "start": 211.735,
                        "end": 211.895,
                        "confidence": 0.9868164,
                        "punctuated_word": "in"
                    },
                    {
                        "word": "general",
                        "start": 211.895,
                        "end": 212.395,
                        "confidence": 0.9807129,
                        "punctuated_word": "general."
                    },
                    

['e commerce going undergo high number change technological social factor',
 'ill describe future e commerce nine trend exist year two thousand and thirty',
 'number one augmented reality gain mainstream acceptance',
 'augmented reality simulates person shopping experience allowing customer see product might look home',
 'ar customer click product instantly see overlay wherever point mobile device',
 'customer view product every conceivable angle interactive three hundred and sixty degree experience give better idea value product offer',
 'smartphones smart glass handheld wearable device provide bulk experience',
 'shopper ar example tool already allows customer view product natural environment',
 'buying',
 'help ensure product good fit purpose intended',
 'help customer make informed decision quick come choosing brand buy',
 'here people dont realize',
 'auglement reality likely widely used virtual reality',
 'according grand view research global virtual reality market worth sixty-se

In [None]:
# https://www.youtube.com/watch?v=byQ8GudNBu8

audio_file4 = "/content/How_Artificial_Intelligence_is_transforming_the_E_comme_byQ8GudNBu8.m4a"

json_transcript4 = extract_transcript(audio_file4)

pod4 = json_to_list(json_transcript4)

cleaned4 = clean(pod4)

cleaned4

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
                        "punctuated_word": "those"
                    },
                    {
                        "word": "different",
                        "start": 67.06,
                        "end": 67.46,
                        "confidence": 0.9995117,
                        "punctuated_word": "different"
                    },
                    {
                        "word": "dimensions",
                        "start": 67.46,
                        "end": 67.96,
                        "confidence": 1.0,
                        "punctuated_word": "dimensions"
                    }
                ],
                "id": "5001f2d0-56a2-4dfa-a371-f856b87217eb"
            },
            {
                "start": 68.58,
                "end": 70.2,
                "confidence": 0.9879883,
                "channel": 0,
                "transcript": "to drive better business outcomes.",
             

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
                    },
                    {
                        "word": "all",
                        "start": 66.58,
                        "end": 66.82,
                        "confidence": 0.93408203,
                        "punctuated_word": "all"
                    },
                    {
                        "word": "those",
                        "start": 66.82,
                        "end": 67.06,
                        "confidence": 0.9916992,
                        "punctuated_word": "those"
                    },
                    {
                        "word": "different",
                        "start": 67.06,
                        "end": 67.46,
                        "confidence": 0.9995117,
                        "punctuated_word": "different"
                    },
                    {
                        "word": "dimensions",
                        "start": 67.46,
       

['there wide range way use ai make fast decision automate optimize drive better business outcome',
 'know solution example going really leverage traffic way customer engage look outcome map together next customer come engage right get optimal experience',
 'say impact broad sector company selling lot',
 'there tremendous number decision need made youre gon na truly maximize optimize business',
 'everything serve customer individual basis optimize price optimize inventory position streamline fulfillment customer service operation',
 'there wide range way use ai make fast decision automate optimize across different dimension drive better business outcome',
 'human being know there sort finite number thing quickly',
 'right',
 'youre talking one hundred one thousand visit day youre talking many thousand transaction day youre talking serve one customer uniquely maximize serve',
 'there lot opportunity leverage ai compliment people involved business drive better business outcome frankly bet

In [None]:
# https://www.youtube.com/watch?v=zqH9ofLJFTw

audio_file5 = "/content/How_Machine_Learning_Is_Impacting_E_Commerce_w_AmeenKaz_zqH9ofLJFTw (1).m4a"

json_transcript5 = extract_transcript(audio_file5)

pod5 = json_to_list(json_transcript5)

cleaned5 = clean(pod5)

cleaned5

In [None]:
# after adding all the podcasts

result = []

result.extend(cleaned1 + cleaned2 + cleaned3 + cleaned4 + cleaned5)

print (result)




In [None]:
data = []  # List to store dictionaries
for sentence in result:
  sentiment, polarity = getPolarity(sentence)
  new_item = {'sentence' : sentence, 'sentiment' : sentiment, 'category' : 'E-Commerce'}    # add category here
  data.append(new_item)

df = pd.DataFrame(data)

In [None]:
# Make sure to change category
df.to_csv('E-Commerce data (podcast-scraped).csv', index=False)
