In [25]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

#### Basics - reading a local html file 
This is to pratice bs4 methods

In [26]:
with open('home.html','r') as html_file:
    content = html_file.read()
    
    soup = BeautifulSoup(content, 'lxml')
    course_cards = soup.find_all('div', class_='card')
    for course in course_cards:
        course_name = course.h5.text
        course_price = course.a.text.split()[-1]
        
        print(f'{course_name} costs {course_price}')


Python for beginners costs 20$
Python Web Development costs 50$
Python Machine Learning costs 100$


### Get the necessary part of HTML code from the website

In [27]:
html_text = requests.get('https://m.timesjobs.com/mobile/jobs-search-result.html?txtKeywords=python&cboWorkExp1=-1&txtLocation=').text
soup = BeautifulSoup(html_text,'lxml')

jobs = soup.find_all("li")

### Formatting for the needed data

Version 1 of the code implementation


In [28]:
posted_period = []
company_names = []
skills_list = []
for job in jobs:
    posting_times = job.find('span',class_='posting-time')
    if posting_times:
        # print(int(posting_times.text[:2]))
        posted_period.append(int(posting_times.text[:2]))
    company_name = job.find('span',class_='srp-comp-name')
    if company_name:
        company_names.append(company_name.text)
    skills = [s.get_text(strip=True) for s in job.find_all('a',class_='srphglt')]
    if skills:    
        skills_list.append(skills)
        
        
# print(len(posted_period))
# print(len(company_names))
# print(len(skills_list))
    

# for company, skills, period in zip(company_names, skills_list, posted_period):
#     if period < 10:
#         print(f"Company: {company}")
#         print(f"Skills: {', '.join(skills)}")
#         print(f"Posted: {period}")
#         print("-" * 30)


df = pd.DataFrame({
    'Company': company_names,
    'Skills': skills_list,
    'Posted': posted_period
})

df['Skills'] = df['Skills'].apply(lambda x: ", ".join(x))
df.head()


Unnamed: 0,Company,Skills,Posted
0,CONNECTING 2 WORK,"rest, python, django, mongodb",1
1,CONNECTING 2 WORK,"python, storage, sql, security, database, djan...",1
2,CONNECTING 2 WORK,"python, cache, rest api, storage, javascript, ...",1
3,CONNECTING 2 WORK,"python, css, github, hosting, bug fixing, boot...",1
4,Techasoft Pvt Ltd,"python, javascript, docker, django, postgresql...",2


### Version 2
(more optimized)

In [29]:
data = []
for job in jobs:
    posting_time = job.find('span', class_='posting-time')
    company_name = job.find('span', class_='srp-comp-name')
    skills = job.find_all('a', class_='srphglt')
    
    if posting_time and company_name and skills:
        data.append({
            'Company': company_name.text,
            'Skills': ', '.join(s.get_text(strip=True) for s in skills),
            'Posted': int(posting_time.text[:2])
        })

df = pd.DataFrame(data)
df

Unnamed: 0,Company,Skills,Posted
0,CONNECTING 2 WORK,"rest, python, django, mongodb",1
1,CONNECTING 2 WORK,"python, storage, sql, security, database, djan...",1
2,CONNECTING 2 WORK,"python, cache, rest api, storage, javascript, ...",1
3,CONNECTING 2 WORK,"python, css, github, hosting, bug fixing, boot...",1
4,Techasoft Pvt Ltd,"python, javascript, docker, django, postgresql...",2
5,SEVEN CONSULTANCY,"Python programming, programming language, fron...",2
6,Excel Ptp,"jquery, sql, git, oops, json, mysql, object or...",2
7,botree technologies,"security compliance, python, html5, svn, javas...",2
8,AxisTechnolabs,"python, css, user interaction, bootstrap, open...",2
9,highrise solutions llp,"python 3.x, django, linux, mysql, microservice...",2


In [30]:
url = "https://www.amazon.in/s?k=mobile+phone+under+20000&crid=3MZE3X15NFGK6&sprefix=mobile%2Caps%2C345&ref=nb_sb_ss_mvt-t11-ranker_ci_tech-br-left_1_6"

In [41]:
header = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0",
    "Accept-Language": "en-US,en;q=0.9",
    "Accept-Encoding": "gzip, deflate, br",
    "Referer": "https://www.amazon.in/",
    "DNT": "1",
    "Connection": "keep-alive",
}

In [42]:
webpage = requests.get(url,headers=header)

In [43]:
print(webpage)

<Response [200]>


In [44]:
soup = BeautifulSoup(webpage.content,'html')

In [51]:
title = soup.find_all('h2',class_="a-size-medium a-spacing-none a-color-base a-text-normal")
prize = soup.find_all('span',class_="a-price-whole")
rating = soup.find_all('span',class_="a-size-small a-color-base")


In [52]:
print(f'''
Title = {len(title)}      
Prize = {len(prize)}      
Rating = {len(rating)}      
      ''')


Title = 23      
Prize = 28      
Rating = 13      
      


In [56]:
for heading in title:
    print(heading.text)

Samsung Galaxy M36 5G (Velvet Black, 8 GB RAM, 128 GB Storage)| Circle to Search| Google Gemini| Corning Gorilla Glass Victus+| 7.7mm Slim| AI Enhanced 50 MP OIS Triple Camera| Unmatched Nightography
iQOO Z10R 5G (Aquamarine, 8GB RAM, 128GB Storage) | 32MP 4K Selfie Camera | Quad-Curved AMOLED Display | Dimensity 7400 Processor with 750K+ AnTuTu
OnePlus Nord CE4 Lite 5G (Super Silver, 8GB RAM, 128GB Storage) | Lifetime Display Warranty | 5500 mAh Battery, 80W SUPERVOOC and Reverse Charging | 50MP Camera with OIS | 120Hz AMOLED Display
Samsung Galaxy M35 5G (DayBreak Blue,8GB RAM,128GB Storage)| Corning Gorilla Glass Victus+| AnTuTu Score 595K+| Vapour Cooling Chamber| 6000mAh Battery| 120Hz Super AMOLED Display| AI| Without Charger
Samsung Galaxy M05 (Mint Green, 4GB RAM, 64 GB Storage) | 50MP Dual Camera | Bigger 6.7" HD+ Display | 5000mAh Battery | 25W Fast Charging | 2 Gen OS Upgrade & 4 Year Security Update | Without Charger
Motorola G45 5G (Viva Magenta, 8GB RAM, 128GB Storage)
Sa