# Q1: In this question you have to scrape data using the filters available on the webpage You have to use the location and salary filter.
You have to scrape data for “Data Scientist” designation for first 10 job results.
You have to scrape the job-title, job-location, company name, experience required.
The location filter to be used is “Delhi/NCR”. The salary filter to be used is “3-6” lakhs
The task will be done as shown in the below steps:
1. first get the web page https://www.naukri.com/
2. Enter “Data Scientist” in “Skill, Designations, and Companies” field.
3. Then click the search button.
4. Then apply the location filter and salary filter by checking the respective boxes
5. Then scrape the data for the first 10 jobs results you get.
6. Finally create a dataframe of the scraped data.

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Function to scrape data from naukri.com
def scrape_naukri_data():
    # URL of the website
    url = 'https://www.naukri.com/'

    # Requesting the page
    response = requests.get(url)
    
    # Checking if the request was successful
    if response.status_code == 200:
        print("Connection successful!")
    else:
        print("Failed to connect to the website!")
        return None
    
    # Parsing HTML content
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Finding the search form and filling it with required details
    search_form = soup.find('form', attrs={'id': 'quicksearch'})
    search_field = search_form.find('input', attrs={'id': 'qsb-keyword-sugg'})
    search_field['value'] = 'Data Scientist'
    
    # Submitting the form
    response = requests.post(url, data=search_form)
    
    # Checking if the request was successful
    if response.status_code == 200:
        print("Search successful!")
    else:
        print("Failed to submit the search form!")
        return None
    
    # Applying filters
    filters = {
        'locations': 'Delhi/NCR',
        'ctcFilter': '3-6'
    }
    for filter_name, value in filters.items():
        filter_input = search_form.find('input', attrs={'name': filter_name})
        if filter_input:
            filter_input['value'] = value
    
    # Submitting the filtered form
    response = requests.post(url, data=search_form)
    
    # Checking if the request was successful
    if response.status_code == 200:
        print("Filters applied successfully!")
    else:
        print("Failed to apply filters!")
        return None
    
    # Parsing the filtered page
    filtered_soup = BeautifulSoup(response.text, 'html.parser')
    
    # Scraping job details
    jobs = []
    job_cards = filtered_soup.find_all('article', class_='jobTuple')
    for job_card in job_cards[:10]:
        title = job_card.find('a', class_='title').text.strip()
        location = job_card.find('li', class_='location').text.strip()
        company = job_card.find('a', class_='subTitle').text.strip()
        experience = job_card.find('li', class_='experience').text.strip()
        jobs.append({
            'Title': title,
            'Location': location,
            'Company': company,
            'Experience': experience
        })
    
    return jobs

# Main function to execute the scraping and create dataframe
def main():
    data = scrape_naukri_data()
    if data:
        df = pd.DataFrame(data)
        print(df)

if __name__ == "__main__":
    main()


# 02Write a python program to scrape data for “Data Scientist” Job position in “Bangalore” location. You have to scrape the job-title, job-location, company_name, experience_required. You have to scrape first 10 jobs data.
#This task will be done in following steps:
1. First get the webpage https://www.shine.com/
2. Enter “Data Analyst” in “Job title, Skills” field and enter “Bangalore” in “enter the location” field.
3. Then click the searchbutton.
4. Then scrape the data for the first 10 jobs results you get.
5. Finally create a dataframe of the scraped data.

In [None]:
def scrape_shine_data():
    url = 'https://www.shine.com/'
    response = requests.post(url, data={'q': 'Data Scientist', 'l': 'Bangalore'})
    if response.status_code != 200: return None
    soup = BeautifulSoup(response.text, 'html.parser')
    jobs = []
    for job_card in soup.find_all('li', class_='sjsresult')[:10]:
        title = job_card.find('h2').text.strip()
        location = job_card.find('span', class_='loc').text.strip()
        company = job_card.find('span', class_='snp').text.strip()
        experience = job_card.find('li', class_='exp').text.strip()
        jobs.append({'Job Title': title, 'Job Location': location, 'Company Name': company, 'Experience Required': experience})
    return jobs

def main():
    data = scrape_shine_data()
    if data:
        df = pd.DataFrame(data)
        print(df)

if __name__ == "__main__":
    main()


# Q3: Scrape 100 reviews data from flipkart.com for iphone11 phone. You have to go the link: https://www.flipkart.com/apple-iphone-11-black-64-gb/product-reviews/itm4e5041ba101fd?pid=MOBFWQ6BXGJCEYNY&lid=LSTMOBFWQ6BXGJCEYNYZXSHRJ&marketplace=FLIPKART
As shown in the above page you have to scrape the tick marked attributes. These are:
1. Rating
2. Review summary
3. Full review
4. You have to scrape this data for first 100reviews.

In [None]:
import requests
from bs4 import BeautifulSoup

def scrape_flipkart_reviews():
    url = 'https://www.flipkart.com/apple-iphone-11-black-64-gb/product-reviews/itm4e5041ba101fd?pid=MOBFWQ6BXGJCEYNY&lid=LSTMOBFWQ6BXGJCEYNYZXSHRJ&marketplace=FLIPKART'
    response = requests.get(url)
    if response.status_code != 200: return None
    
    soup = BeautifulSoup(response.text, 'html.parser')
    reviews = []
    
    for review_card in soup.find_all('div', class_='col _2wzgFH K0kLPL')[:100]:
        rating = review_card.find('div', class_='hGSR34').text.strip()
        review_summary = review_card.find('p', class_='_2-N8zT').text.strip()
        full_review = review_card.find('div', class_='qwjRop').text.strip()
        reviews.append({'Rating': rating, 'Review Summary': review_summary, 'Full Review': full_review})
    
    return reviews

def main():
    reviews = scrape_flipkart_reviews()
    for idx, review in enumerate(reviews):
        print(f"Review {idx+1}:")
        print(f"Rating: {review['Rating']}")
        print(f"Review Summary: {review['Review Summary']}")
        print(f"Full Review: {review['Full Review']}")
        print("\n")

if __name__ == "__main__":
    main()


# Q4: Scrape data for first 100 sneakers you find when you visit flipkart.com and search for “sneakers” in the search field.
You have to scrape 3 attributes of each sneaker:
1. Brand
2. Product Description
3. Price
As shown in the below image, you have to scrape the above attributes.

In [None]:
url = "https://www.flipkart.com/search?q=sneakers"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"}

response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')

products = soup.find_all('div', {'class': '_1AtVbE'})

sneakers_list = []
for product in products:
    if len(sneakers_list) >= 100:
        break
    try:
        brand = product.find('div', {'class': '_2WkVRV'}).text
        description = product.find('a', {'class': 'IRpwTa'}).text
        price = product.find('div', {'class': '_30jeq3'}).text
        sneakers_list.append([brand, description, price])
    except AttributeError:
        continue

# 07Write a python program to display list of respected former Prime Ministers of India (i.e. Name, Born-Dead, Term of office, Remarks) from https://www.jagranjosh.com/general-knowledge/list-of-all-prime-ministers-of-india-1473165149-1
scrap the mentioned data and make the DataFrame

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the webpage to scrape
url = "https://www.jagranjosh.com/general-knowledge/list-of-all-prime-ministers-of-india-1473165149-1"

# Send a GET request to the URL
response = requests.get(url)

# Parse the HTML content of the page
soup = BeautifulSoup(response.content, 'html.parser')

# Find the table containing the data
table = soup.find('table')

# Lists to store the scraped data
data = []

# Iterate through the rows of the table
for row in table.find_all('tr')[1:]:  # Skip the header row
    cols = [col.text.strip() for col in row.find_all('td')]
    if len(cols) == 4:
        data.append(cols)

# Create a DataFrame with the scraped data
df = pd.DataFrame(data, columns=['Name', 'Born-Dead', 'Term of Office', 'Remarks'])

# Display the DataFrame
print(df)

# Optionally, save the DataFrame to a CSV file
df.to_csv('indian_prime_ministers.csv', index=False)


# 08 Write a python program to display list of 50 Most expensive cars in the world (i.e. Car name and Price) from https://www.motor1.com/
This task will be done in following steps:
1. First get the webpage https://www.motor1.com/
2. Then You have to type in the search bar ’50 most expensive cars’
3. Then click on 50 most expensive cars in the world..
4. Then scrap the mentioned data and make the dataframe.

In [None]:
import requests
from bs4 import BeautifulSoup

# Step 1: Get the webpage
url = 'https://www.motor1.com/'
response = requests.get(url)
if response.status_code != 200:
    print("Failed to fetch the webpage.")
    exit()

# Step 2: Type in the search bar '50 most expensive cars'
search_query = "50 most expensive cars"
search_url = f"https://www.motor1.com/?s={search_query.replace(' ', '+')}"
search_response = requests.get(search_url)
if search_response.status_code != 200:
    print("Failed to execute the search query.")
    exit()

# Step 3: Click on '50 most expensive cars in the world'
soup = BeautifulSoup(search_response.text, 'html.parser')
expensive_cars_link = soup.find('a', text='50 Most Expensive Cars In The World')
if not expensive_cars_link:
    print("Failed to find the link for the 50 most expensive cars.")
    exit()

expensive_cars_url = expensive_cars_link['href']

# Step 4: Scrape the mentioned data and make the dataframe
expensive_cars_response = requests.get(expensive_cars_url)
if expensive_cars_response.status_code != 200:
    print("Failed to fetch data for the 50 most expensive cars.")
    exit()

expensive_cars_soup = BeautifulSoup(expensive_cars_response.text, 'html.parser')

# Extract data
car_data = []
car_table = expensive_cars_soup.find('table', class_='table-wrapper')
if car_table:
    rows = car_table.find_all('tr')[1:]  # Skip the header row
    for row in rows:
        cells = row.find_all('td')
        car_name = cells[0].text.strip()
        car_price = cells[1].text.strip()
        car_data.append([car_name, car_price])

# Display data
for car in car_data:
    print(f"Car Name: {car[0]}, Price: {car[1]}")