In [None]:
from requests.exceptions import URLRequired
from bs4 import BeautifulSoup
import pandas as pd
import requests
import json

In [None]:
# Returns a list of all channel URLs that appear for a given channel query
def get_channel_list(query):
    channel_list = []
    url = 'https://rumble.com/search/channel?q=' + query
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    channels = soup.findAll('a', attrs={"class": "channel-item--a"})

    for channel in channels:
        channel_list.append('https://rumble.com' + channel.get('href'))

    return channel_list

In [None]:
# Returns a list of all video URLs across all pages that appear for a given search query
def get_video_list_paginator(query):
    page_urls = []
    base_url = 'https://rumble.com/search/all?q=' + query
    page_urls.append(base_url)
    response = requests.get(base_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    pages = soup.findAll('a', attrs={"class": "paginator--link"})

    for page in pages:
        page_urls.append('https://rumble.com' + page.get('href'))
        if (page.get('aria-label') == '»'):
            arrow_page_url = 'https://rumble.com' + page.get('href')
            paginator_helper(arrow_page_url, page_urls)

    video_urls = []
    for url in page_urls:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        videos = soup.findAll('a', attrs={"class": "video-item--a"})

        for video in videos:
            video_urls.append('https://rumble.com' + video.get('href'))

    return video_urls

In [None]:
# Recursive helper function that returns list of all subsequent page URLs for a given page URL
def paginator_helper(url, page_urls):
    first_page = int(url[-1])
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    pages = soup.findAll('a', attrs={"class": "paginator--link"})

    # Base case for recursive function
    if (len(pages) == 0):
        return page_urls

    # Finds index of first page we want to look at
    start_idx = 0
    for i, page in enumerate(pages):
        if (page.get('aria-label') == str(first_page + 1)):
            start_idx = i
            break

    # If all the pages can be found on the current URL, append those pages to the list
    if (pages[-1].get('aria-label') != '»'):
        for page in pages[start_idx:]:
            page_urls.append('https://rumble.com' + page.get('href'))
        return page_urls

    # Else, if there are more pages left to be found, append and recurse
    for page in pages[start_idx:]:
        page_urls.append('https://rumble.com' + page.get('href'))
        if (page.get('aria-label') == '»'):
            arrow_page_url = 'https://rumble.com' + page.get('href')
            paginator_helper(arrow_page_url, page_urls)
    return page_urls

In [None]:
# returns a list of all video urls that a given channel uploaded by recency
def videos_by_channel(channel):
  page_urls = []
  url = 'https://rumble.com/c/' + channel
  page_urls.append(url)
  response = requests.get(url)
  soup = BeautifulSoup(response.text, 'html.parser')
  pages = soup.findAll('a', attrs={"class": "paginator--link"})
  for i in range(len(pages)):
    page_urls.append('https://rumble.com' + pages[i].get('href'))
    if (pages[i].get('aria-label') == '»'):
      arrow_page_url = 'https://rumble.com' + pages[i].get('href')
      paginator_helper(arrow_page_url, page_urls)

  video_urls = []
  for url in page_urls:
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    videos = soup.findAll('a', attrs={"class": "video-item--a"})
    for video in videos:
      video_urls.append('https://rumble.com' + video.get('href'))
  return video_urls

In [None]:
# returns integer value of upvote_count for a given video url
def get_upvotes(url):
  response = requests.get(url)
  soup = BeautifulSoup(response.text, 'html.parser')
  upvote_element = soup.find('span', class_='rumbles-up-votes')
  upvote_count = upvote_element.get_text(strip=True)
  return upvote_count

In [None]:
# returns integer value of downvote_count for a given video url
def get_downvotes(url):
  response = requests.get(url)
  soup = BeautifulSoup(response.text, 'html.parser')
  downvote_element = soup.find('span', class_='rumbles-down-votes')
  downvote_count = downvote_element.get_text(strip=True)
  return downvote_count

In [None]:
# returns a JSON of data for a given video url (Title, URL, Published Date, Channel, Channel URL, Duration, Upvotes, Downvotes, Viewcount)
def get_video_data(url):
    result = []

    headers = {
      #'authority': 'rumble.com',
      'accept': '*/*',
      'accept-language': 'en-US,en;q=0.9',
      'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Mobile Safari/537.36',
    }

    html = requests.get(url)
    soup = BeautifulSoup(html.text, 'html.parser')
    text = str(soup)
    video_id_begin = text.find('rel="preconnect"/><link href="https://rumble.com/api/Media/oembed.json?url=https%3A%2F%2Frumble.com%2Fembed%2F') + 110
    video_id_end = text.find('%2F" rel="alternate" title', video_id_begin)
    video_id = text[video_id_begin:video_id_end]

    params = {
      'request': 'video',
      'ver': '2',
      'v': video_id,
      'ext': '{"ad_count":null}',
      'ad_wt': '339',
    }

    response = requests.get('https://rumble.com/embedJS/u3/', params=params, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    text = str(soup)

    begin_title = text.find('title') + 8
    end_title = text.find(',"author')
    title = 'Title: ' + text[begin_title:end_title - 1]
    result.append(title)

    video_url = 'Video URL: ' + url
    result.append(video_url)

    begin_pubdate = text.find('pubDate') + 10
    end_pubdate = text.find(',"loaded"')
    pubdate = 'Published Date: ' + text[begin_pubdate:end_pubdate - 1]
    result.append(pubdate)

    begin_channel = text.find('"author":{"name":') + 18
    end_channel = text.find(',"url":')
    channel = 'Channel: ' + text[begin_channel:end_channel - 1]
    result.append(channel)

    begin_channelurl = text.find(',"url":') + 8
    end_channelurl = text.find('},"player"')
    channelurl = 'Channel URL: ' + text[begin_channelurl:end_channelurl - 1]
    result.append(channelurl)

    begin_duration = text.find('"duration":') + 11
    end_duration = text.find(',"pubDate":')
    duration = 'Duration: ' + text[begin_duration:end_duration]
    result.append(duration)

    upvotes = 'Upvotes: ' + get_upvotes(url)
    result.append(upvotes)

    downvotes = 'Downvotes: ' + get_downvotes(url)
    result.append(downvotes)

    html = requests.get(url)
    soup = BeautifulSoup(html.text, 'html.parser')
    text = str(soup)

    video_count_class_idx = text.find('svg class="video-counters--icon"')
    a = text.find('</svg>', video_count_class_idx)
    begin_video_viewcount = a + 6
    end_video_viewcount = text.find('\t', begin_video_viewcount)
    viewcount = 'Viewcount: ' + text[begin_video_viewcount: end_video_viewcount]
    result.append(viewcount)

    # convert list to json
    json_result = json.dumps(result)
    return json_result

In [None]:
# tests

channel_videos = videos_by_channel('JohnnyB')
print(channel_videos)

vid_data = get_video_data('https://rumble.com/v2fmc9h-building-a-amazing-split-flap-clock-arduino.html')
print(vid_data)

vids_urls = get_video_list_paginator('cheese cats cute')
print(vids_urls)

channels = get_channel_list('trump')
print(channels)