In [1]:
"""
Q.1) Write a python program to display IMDB’s Top rated 100 Indian movies’ data
https://www.imdb.com/list/ls056092300/ (i.e. name, rating, year ofrelease) and make data frame.
"""
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Function to scrape IMDb's Top 100 Indian movies
def scrape_top_indian_movies(url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        movies = soup.find_all('div', class_='lister-item-content')
        movie_data = []
        for movie in movies:
            name = movie.find('a').text.strip()
            rating = movie.find('span', class_='ipl-rating-star__rating').text.strip()
            year = movie.find('span', class_='lister-item-year').text.strip('()')
            movie_data.append({'Name': name, 'Rating': rating, 'Year': year})
        return movie_data
    else:
        print("Failed to fetch data from IMDb.")
        return None

# IMDb Top 100 Indian movies URL
url = "https://www.imdb.com/list/ls056092300/"

# Scrape the data
top_indian_movies_data = scrape_top_indian_movies(url)

if top_indian_movies_data:
    # Convert data to DataFrame
    df = pd.DataFrame(top_indian_movies_data)
    print(df)
else:
    print("No data scraped.")


                                 Name Rating  Year
0                     Ship of Theseus      8  2012
1                              Iruvar    8.4  1997
2                     Kaagaz Ke Phool    7.8  1959
3   Lagaan: Once Upon a Time in India    8.1  2001
4                     Pather Panchali    8.2  1955
..                                ...    ...   ...
95                        Apur Sansar    8.4  1959
96                        Kanchivaram    8.2  2008
97                    Monsoon Wedding    7.3  2001
98                              Black    8.1  2005
99                            Deewaar      8  1975

[100 rows x 3 columns]


In [2]:
"""
4) Write a python program to scrape details of all the posts from https://www.patreon.com/coreyms .
Scrape the
heading, date, content and the likes for the video from the link for the youtube video from the post.

"""
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

# Function to scrape details of posts from Corey Schafer's Patreon page
def scrape_patreon_posts(url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        posts = soup.find_all('div', class_='post-card')
        post_data = []
        for post in posts:
            heading = post.find('h3', class_='post-card__title').text.strip()
            date = post.find('time', class_='post-card__published-at')['datetime']
            content = post.find('div', class_='post-card__excerpt').text.strip()
            youtube_link = post.find('a', class_='oembed')['href'] if post.find('a', class_='oembed') else None
            likes = None
            if youtube_link:
                video_response = requests.get(youtube_link)
                if video_response.status_code == 200:
                    video_soup = BeautifulSoup(video_response.content, 'html.parser')
                    likes_text = video_soup.find('button', class_='like-button-renderer-like-button-unclicked').text.strip()
                    likes_match = re.search(r'(\d+\.?\d*)\s+likes', likes_text)
                    likes = likes_match.group(1) if likes_match else None
            post_data.append({'Heading': heading, 'Date': date, 'Content': content, 'YouTube Likes': likes})
        return post_data
    else:
        print("Failed to fetch data from Patreon.")
        return None

# Patreon page URL
url = "https://www.patreon.com/coreyms"

# Scrape the data
patreon_posts_data = scrape_patreon_posts(url)

if patreon_posts_data:
    # Convert data to DataFrame
    df = pd.DataFrame(patreon_posts_data)
    print(df)
else:
    print("No data scraped.")


No data scraped.


In [3]:
"""
5) Write a python program to scrape house details from mentioned URL. It should include house title, location,
area, EMI and price from https://www.nobroker.in/ .Enter three localities which are Indira Nagar, Jayanagar,
Rajaji Nagar.
"""
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_house_details(locality):
    url = f"https://www.nobroker.in/property/sale/bangalore/{locality}?searchParam=W3sibGF0IjoxMy4wMDYxNDUyLCJsb24iOjc3Ljg3MTk5OTksInBsYWNlSWQiOiJDaElKdnV2ZVhYS1hzLU5wQlprYjNkX1RRIn1d&radius=2.0"
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        cards = soup.find_all('div', class_='card')
        house_data = []
        for card in cards:
            title = card.find('h2', class_='heading-6 font-semi-bold nb__1AShY').text.strip()
            location = card.find('div', class_='nb__2CMjv').text.strip()
            area = card.find('div', class_='nb__3oNyC').text.strip()
            emi = card.find('div', class_='font-semi-bold heading-6', text='EMI').find_next('div').text.strip()
            price = card.find('div', class_='font-semi-bold heading-6').text.strip()
            house_data.append({'Title': title, 'Location': location, 'Area': area, 'EMI': emi, 'Price': price})
        return house_data
    else:
        print(f"Failed to fetch data for {locality}")
        return None

# List of localities
localities = ['indira-nagar', 'jayanagar', 'rajaji-nagar']

# Scrape data for each locality
all_house_data = []
for locality in localities:
    locality_data = scrape_house_details(locality)
    if locality_data:
        all_house_data.extend(locality_data)

# Convert data to DataFrame
df = pd.DataFrame(all_house_data)
print(df)


Empty DataFrame
Columns: []
Index: []


In [None]:

"""

6) Write a python program to scrape first 10 product details which include product name , price , Image URL from
https://www.bewakoof.com/bestseller?sort=popular .
"""
import requests
from bs4 import BeautifulSoup

def scrape_product_details(url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        products = soup.find_all('div', class_='productCardListing')
        product_details = []
        for product in products[:10]:  # Limiting to the first 10 products
            name = product.find('h3', class_='name').text.strip()
            price = product.find('span', class_='price').text.strip()
            image_url = product.find('img')['src']
            product_details.append({'Name': name, 'Price': price, 'Image URL': image_url})
        return product_details
    else:
        print("Failed to fetch data")
        return None

url = "https://www.bewakoof.com/bestseller?sort=popular"
product_details = scrape_product_details(url)
if product_details:
    for idx, product in enumerate(product_details, 1):
        print(f"Product {idx}:")
        print(f"Name: {product['Name']}")
        print(f"Price: {product['Price']}")
        print(f"Image URL: {product['Image URL']}")
        print()


In [13]:
'''
7) Please visit https://www.cnbc.com/world/?region=world and scrapa) headings
b) date
c) News link
'''
import requests
from bs4 import BeautifulSoup

def scrape_most_downloaded_articles(url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        articles = soup.find_all('div', class_='cmp-list-item')
        article_data = []
        for article in articles:
            # Extracting paper title
            title = article.find('h2').text.strip()
            # Extracting date
            date = article.find('time').text.strip()
            # Extracting author
            author = article.find('div', class_='cmp-list-item--authors').text.strip()
            article_data.append({'Title': title, 'Date': date, 'Author': author})
        return article_data
    else:
        print("Failed to fetch data")
        return None

url = "https://www.keaipublishing.com/en/journals/artificial-intelligence-in-agriculture/most-downloadedarticles/"
article_data = scrape_most_downloaded_articles(url)
if article_data:
    for idx, article in enumerate(article_data, 1):
        print(f"Article {idx}:")
        print(f"Title: {article['Title']}")
        print(f"Date: {article['Date']}")
        print(f"Author: {article['Author']}")
        print()


Failed to fetch data


In [14]:


"""
8) Please visit https://www.keaipublishing.com/en/journals/artificial-intelligence-in-agriculture/most-downloadedarticles/ and scrap-
 a) Paper title
 b) date
 c) Author
 """
import requests
from bs4 import BeautifulSoup

def scrape_most_downloaded_articles(url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        titles = soup.find_all('h2', class_='cmp-list-item--title')
        dates = soup.find_all('time')
        authors = soup.find_all('div', class_='cmp-list-item--authors')
        
        article_data = []
        for title, date, author in zip(titles, dates, authors):
            title_text = title.text.strip()
            date_text = date.text.strip()
            author_text = author.text.strip()
            article_data.append({'Title': title_text, 'Date': date_text, 'Author': author_text})
        
        return article_data
    else:
        print("Failed to fetch data")
        return None

url = "https://www.keaipublishing.com/en/journals/artificial-intelligence-in-agriculture/most-downloadedarticles/"
article_data = scrape_most_downloaded_articles(url)
if article_data:
    for idx, article in enumerate(article_data, 1):
        print(f"Article {idx}:")
        print(f"Title: {article['Title']}")
        print(f"Date: {article['Date']}")
        print(f"Author: {article['Author']}")
        print()


Failed to fetch data
