In [1]:
#Importing Libraries
import pandas as pd
from bs4 import BeautifulSoup
import requests

#### 1. Write a python program to display all the header tags from [https://en.wikipedia.org/wiki/Main_Page](https://en.wikipedia.org/wiki/Main_Page).

In [2]:
#Function of fetch all the header tags from a web page
def get_headers(url,tags=['h1','h2','h3','h4','h5','h6']):
    page = requests.get(url)
    
    if page.status_code == 200:
        soup = BeautifulSoup(page.content,'html.parser')
        
        contents = {}
        for tag in tags:
            tag_contents = []
            for content in soup.find_all(tag):
                tag_contents.append(content) #content.get_text().replace('\n','') can be used to get contents only.
                
            if len(tag_contents)>0:
                contents[tag] = tag_contents
                
        return contents
    else:
        return {"Error":[page.status_code]}
    

In [3]:
url = 'https://en.wikipedia.org/wiki/Main_Page'
all_headers = get_headers(url)

#Interpreting headers
all_headers

{'h1': [<h1 class="firstHeading" id="firstHeading">Main Page</h1>],
 'h2': [<h2 class="mp-h2" id="mp-tfa-h2"><span id="From_today.27s_featured_article"></span><span class="mw-headline" id="From_today's_featured_article">From today's featured article</span></h2>,
  <h2 class="mp-h2" id="mp-dyk-h2"><span class="mw-headline" id="Did_you_know_...">Did you know ...</span></h2>,
  <h2 class="mp-h2" id="mp-itn-h2"><span class="mw-headline" id="In_the_news">In the news</span></h2>,
  <h2 class="mp-h2" id="mp-otd-h2"><span class="mw-headline" id="On_this_day">On this day</span></h2>,
  <h2 class="mp-h2" id="mp-tfp-h2"><span id="Today.27s_featured_picture"></span><span class="mw-headline" id="Today's_featured_picture">Today's featured picture</span></h2>,
  <h2 class="mp-h2" id="mp-other"><span class="mw-headline" id="Other_areas_of_Wikipedia">Other areas of Wikipedia</span></h2>,
  <h2 class="mp-h2" id="mp-sister"><span id="Wikipedia.27s_sister_projects"></span><span class="mw-headline" id="Wik

#### 2. Write a python program to display IMDB’s Top rated 100 movies’ data (i.e. Name, IMDB rating, Year of release) and save it in form of a CSV file. [https://www.imdb.com/chart/top/?sort=ir,desc&mode=simple&page=1](https://www.imdb.com/chart/top/?sort=ir,desc&mode=simple&page=1)

In [4]:
import re
#Function to display IMDB's Top rated 100 movies's data and save it in form of a CSV
def get_top_rated_imdb_movies(url,tag='',cl='',upto=100,filename='top_rated_movies.csv'):
    page = requests.get(url)
    
    if page.status_code == 200:
        soup = BeautifulSoup(page.content,'html.parser')
        data_main = soup.find_all(tag,cl)
        soup_tr = BeautifulSoup(str(data_main[0]),'html.parser')
        data_tr = (soup.find_all('tr'))[:upto]
        contents = {}
        names = []
        imdb_ratings = []
        years = []
        for data in data_tr:
            name = data.find('td',class_='titleColumn')
            names.append(name.a.string if name else None)
            
            rating = data.find('td',class_='ratingColumn imdbRating')
            imdb_ratings.append(rating.strong.string if rating else None)
            
            year = data.find('span',class_='secondaryInfo')
            years.append(year.string if year else None)
            
        contents['Name'] = names
        contents['IMDB Rating'] = imdb_ratings
        contents['Year'] = years
        
        df_imdb = pd.DataFrame(contents)
        df_imdb['Year'] = df_imdb['Year'].str.extract(r'(\d+)')
        df_imdb = df_imdb[1:]
        
        filename = f"{len(df_imdb)}_{filename}"
        df_imdb.to_csv(filename)
        print(f"Data saved successfully to: {filename}")
        
        return df_imdb
    else:
        return {"Error":[page.status_code]}

In [5]:
url = 'https://www.imdb.com/chart/top/?sort=ir,desc&mode=simple&page=1'
get_top_rated_imdb_movies(url,'tbody','lister-list',101)

Data saved successfully to: 100_top_rated_movies.csv


Unnamed: 0,Name,IMDB Rating,Year
1,The Shawshank Redemption,9.2,1994
2,The Godfather,9.1,1972
3,The Godfather: Part II,9.0,1974
4,The Dark Knight,9.0,2008
5,12 Angry Men,8.9,1957
...,...,...,...
96,Citizen Kane,8.3,1941
97,Dangal,8.3,2016
98,Zack Snyder's Justice League,8.3,2021
99,Singin' in the Rain,8.2,1952


#### 3. Write a python program to display IMDB’s Top rated 100 Indian movies’ data (i.e. Name, IMDB rating, Year of release) and save it in form of a CSV file. [https://www.imdb.com/india/top-rated-indian-movies/](https://www.imdb.com/india/top-rated-indian-movies/)

In [6]:
#By using the function defined in Q2, i.e., get_top_rated_imdb_movies(url,tag='',cl='',upto=100,filename='top_rated_movies.csv')
url = "https://www.imdb.com/india/top-rated-indian-movies/"
get_top_rated_imdb_movies(url,'tbody','lister-list',101,'top_rated_indian_movies.csv')

Data saved successfully to: 100_top_rated_indian_movies.csv


Unnamed: 0,Name,IMDB Rating,Year
1,Pather Panchali,8.5,1955
2,Gol Maal,8.5,1979
3,Nayakan,8.5,1987
4,Anbe Sivam,8.5,2003
5,Drishyam 2,8.5,2021
...,...,...,...
96,The Legend of Bhagat Singh,8.0,2002
97,Bommarillu,8.0,2006
98,Maqbool,8.0,2003
99,Bombay,8.0,1995


#### 4. Write a python program to scrap book name, author name, genre and book review of any 5 books from [www.bookpage.com](https://bookpage.com)

In [7]:
#Function for scraping book details (name,author,genre,review)
def get_book_details(url,tag='',cl='',upto=5):
    page = requests.get(url)
    if page.status_code == 200:
        soup = BeautifulSoup(page.content,'html.parser')
        books = (soup.find_all(tag,class_=cl))[:upto]
        
        names = []
        author_names = []
        genres = []
        book_reviews = []
        
        for book in books:
            #extracting book name
            name = book.h4.a.string
            names.append(name)
            
            #extracting author name
            author_name = book.find('p',class_='sans bold').string.strip('\n')
            author_names.append(author_name)
            
            #extracting book genre
            genre = book.find('p',class_='genre-links hidden-phone')
            a_genre = genre.find_all('a')
            genre_list = ''
            for a in a_genre:
                genre_list = genre_list+","+a.string
            
            genres.append(genre_list.strip(','))
            
            #extracting book reviews
            book_review = book.find('p',class_='excerpt').text.strip('\n')
            book_reviews.append(book_review.strip('\xa0'))
            
        book_details = {"Name":names,"Author":author_names,"Genre":genres,"Reviews":book_reviews}
        
        return book_details
    else:
        return {"Error":[page.status_code]}

In [8]:
url = "https://bookpage.com/reviews"
book_details = get_book_details(url,'div','flex-article-content')
df_books = pd.DataFrame(book_details)
#Interpreting book details
df_books

Unnamed: 0,Name,Author,Genre,Reviews
0,The One Thing You'd Save,"Linda Sue Park, Robert Sae-Heng","Children's,Middle Grade","In The One Thing You’d Save, a teacher named M..."
1,The Daughters of Kobani,Gayle Tzemach Lemmon,"Nonfiction,History,Middle Eastern History",The story of how young Kurdish women brought d...
2,The Zoologist's Guide to the Galaxy,Arik Kershenbaum,"Nonfiction,Science & Nature",Zoologist Arik Kershenbaum draws on a range of...
3,Rivals,Tommy Greenwald,"Children's,Middle Grade","In Rivals, Tommy Greenwald’s second novel set ..."
4,"Brother, Sister, Mother, Explorer",Jamie Figueroa,"Fiction,Literary Fiction","Ghosts hover over Jamie Figueroa’s debut, a ly..."


### 5. Write a python program to scrape cricket rankings from [www.icc-cricket.com](https://www.icc-cricket.com). You have to scrape:
#### i. Top 10 ODI teams in men’s cricket along with the records for matches, points and rating. 
#### ii. Top 10 ODI Batsmen in men along with the records of their team and rating. 
#### iii. Top 10 ODI bowlers along with the records of their team and rating.

#### i. Top 10 ODI teams in men’s cricket along with the records for matches, points and rating. [https://www.icc-cricket.com/rankings/mens/team-rankings/odi](https://www.icc-cricket.com/rankings/mens/team-rankings/odi)

In [9]:
#Function to scrape top 10 ODI teams in men's cricket
def get_top_odi_teams(url,tag='',cl='',upto=10):
    page = requests.get(url)
    if page.status_code == 200:
        soup = BeautifulSoup(page.content,'html.parser')
        team_list = soup.find(tag,class_=cl).table.tbody
        
        team_list_rows = team_list.find_all('tr')[0:upto]
        
        team_positions = []
        team_names = []
        team_matches = []
        team_points = []
        team_ratings = []
        
        for team in team_list_rows:
            team_details = team.find_all('td')
            
            team_position = team_details[0].string
            team_positions.append(team_position)
            
            team_name = team.find('span',class_='u-hide-phablet').string
            team_names.append(team_name)
            
            team_match = team_details[2].string
            team_matches.append(team_match)
            
            team_point = team_details[3].string
            team_points.append(team_point)
            
            team_rating = team_details[4].text.strip('\n')
            team_rating = team_rating.strip(' ')
            team_rating = team_rating.strip('\n')
            team_ratings.append(team_rating)
            
        top_teams = {"Pos":team_positions,"Name":team_names,"Matches":team_matches,"Points":team_points,"Ratings":team_ratings}
        
        return top_teams
    else:
        return {"Error":[page.status_code]}

In [10]:
url = "https://www.icc-cricket.com/rankings/mens/team-rankings/odi"
top_teams = get_top_odi_teams(url,'div','rankings-block__container full rankings-table')
df_top_teams = pd.DataFrame(top_teams)
#Interpreting Top 10 ODI men's cricket team
df_top_teams

Unnamed: 0,Pos,Name,Matches,Points,Ratings
0,1,England,44,5405,123
1,2,India,52,6102,117
2,3,New Zealand,33,3857,117
3,4,Australia,39,4344,111
4,5,South Africa,31,3345,108
5,6,Pakistan,35,3490,100
6,7,Bangladesh,38,3432,90
7,8,Sri Lanka,42,3372,80
8,9,West Indies,49,3802,78
9,10,Afghanistan,31,1844,59


#### ii. Top 10 ODI Batsmen in men along with the records of their team and rating. [https://www.icc-cricket.com/rankings/mens/player-rankings/odi](https://www.icc-cricket.com/rankings/mens/player-rankings/odi)

In [11]:
#Function to scrape top 10 ODI Batsmen
def get_top_odi_batsmen(url,tag='',att='',upto=10):
    page = requests.get(url)
    if page.status_code == 200:
        player_positions = []
        player_names = []
        player_teams = []
        player_ratings = []
        
        soup = BeautifulSoup(page.content,'html.parser')
        top_player = soup.find('div',class_='rankings-block__top-player')
        
        player_pos = top_player.find('div',class_='rankings-block__banner--pos').text.strip('\n')
        player_pos = player_pos.strip(' ')
        player_pos = player_pos.strip('\n')
        player_positions.append(player_pos)
        
        player_name = top_player.find('div',class_='rankings-block__banner--name').text.strip('\n')
        player_names.append(player_name)
        
        player_team = top_player.find('div',class_='rankings-block__banner--nationality').text.strip('\n')
        player_team = player_team.strip(' ')
        player_team = player_team.strip('\n')
        player_teams.append(player_team)
        
        top_player_rating = soup.find('div',class_='rankings-block__banner--rating').text.strip('\n')
        player_ratings.append(top_player_rating)
        
        other_players = soup.find('table',class_='table').tbody
        other_player_rows = other_players.find_all('tr')[0:upto-1]
        for rows in other_player_rows:
            cols = rows.find_all('td')
            
            position = cols[0].text.strip('\n')
            position = position.strip(' ')
            position = position.strip('\n')
            position = position.strip('\xa0')
            player_positions.append(position)
            
            name = cols[1].text.strip('\n')
            player_names.append(name)
            
            team = cols[2].text.strip('\n')
            team = team.strip(' ')
            team = team.strip('\n')
            player_teams.append(team)
            
            rating = cols[3].text.strip('\n')
            player_ratings.append(rating)
            
        top_odi_batsmen = {"Pos":player_positions,"Name":player_names,"Team":player_teams,"Rating":player_ratings}
        
        return top_odi_batsmen
    else:
        return {"Error":[page.status_code]}

In [12]:
url = 'https://www.icc-cricket.com/rankings/mens/player-rankings/odi'
top_odi_batsmen = get_top_odi_batsmen(url)
df_tob = pd.DataFrame(top_odi_batsmen)
#Interpreting Top 10 ODI Batsmen in men's cricket
df_tob

Unnamed: 0,Pos,Name,Team,Rating
0,1.0,Virat Kohli,IND,870
1,2.0,Rohit Sharma,IND,842
2,3.0,Babar Azam,PAK,837
3,4.0,Ross Taylor,NZ,818
4,5.0,Aaron Finch,AUS,791
5,6.0,Francois du Plessis,SA,790
6,7.0,David Warner,AUS,773
7,,Shai Hope,WI,773
8,9.0,Kane Williamson,NZ,765
9,10.0,Quinton de Kock,SA,755


#### iii. Top 10 ODI bowlers along with the records of their team and rating. [https://www.icc-cricket.com/rankings/mens/player-rankings/odi](https://www.icc-cricket.com/rankings/mens/player-rankings/odi)

In [13]:
#Function to scrape top 10 ODI Batsmen
def get_top_odi_bowlers(url,tag='',att='',upto=10):
    page = requests.get(url)
    if page.status_code == 200:
        player_positions = []
        player_names = []
        player_teams = []
        player_ratings = []
        
        soup = BeautifulSoup(page.content,'html.parser')
        top_player = soup.find_all('div',class_='rankings-block__top-player')[1]
        
        player_pos = top_player.find('div',class_='rankings-block__banner--pos').text.strip('\n')
        player_pos = player_pos.strip(' ')
        player_pos = player_pos.strip('\n')
        player_positions.append(player_pos)
        
        player_name = top_player.find('div',class_='rankings-block__banner--name').text.strip('\n')
        player_names.append(player_name)
        
        player_team = top_player.find('div',class_='rankings-block__banner--nationality').text.strip('\n')
        player_team = player_team.strip(' ')
        player_team = player_team.strip('\n')
        player_teams.append(player_team)
        
        top_player_rating = soup.find('div',class_='rankings-block__banner--rating').text.strip('\n')
        player_ratings.append(top_player_rating)
        
        other_players = soup.find_all('table',class_='table')[1].tbody
        other_player_rows = other_players.find_all('tr')[0:upto-1]
        for rows in other_player_rows:
            cols = rows.find_all('td')
            
            position = cols[0].text.strip('\n')
            position = position.strip(' ')
            position = position.strip('\n')
            position = position.strip('\xa0')
            player_positions.append(position)
            
            name = cols[1].text.strip('\n')
            player_names.append(name)
            
            team = cols[2].text.strip('\n')
            team = team.strip(' ')
            team = team.strip('\n')
            player_teams.append(team)
            
            rating = cols[3].text.strip('\n')
            player_ratings.append(rating)
            
        top_odi_batsmen = {"Pos":player_positions,"Name":player_names,"Team":player_teams,"Rating":player_ratings}
        
        return top_odi_batsmen
    else:
        return {"Error":[page.status_code]}

In [14]:
url = 'https://www.icc-cricket.com/rankings/mens/player-rankings/odi'
top_odi_bowlers = get_top_odi_bowlers(url)
df_tobl = pd.DataFrame(top_odi_bowlers)
#Interpreting Top 10 ODI bowlers
df_tobl

Unnamed: 0,Pos,Name,Team,Rating
0,1,Trent Boult,NZ,870
1,2,Mujeeb Ur Rahman,AFG,708
2,3,Jasprit Bumrah,IND,700
3,4,Mehedi Hasan,BAN,694
4,5,Chris Woakes,ENG,675
5,6,Kagiso Rabada,SA,665
6,7,Josh Hazlewood,AUS,660
7,8,Mustafizur Rahman,BAN,658
8,9,Mohammad Amir,PAK,647
9,10,Pat Cummins,AUS,646


### 6. Write a python program to scrape cricket rankings from [www.icc-cricket.com](https://www.icc-cricket.com). You have to scrape:
#### i) Top 10 ODI teams in women’s cricket along with the records for matches, points and rating. 
#### ii) Top 10 women’s ODI players along with the records of their team and rating. 
#### iii) Top 10 women’s ODI all-rounder along with the records of their team and rating

#### i) Top 10 ODI teams in women’s cricket along with the records for matches, points and rating. [https://www.icc-cricket.com/rankings/womens/team-rankings/odi](https://www.icc-cricket.com/rankings/womens/team-rankings/odi)

In [15]:
#Scraping Top 10 ODI teams in women's cricket using already defined function: get_top_odi_teams(url,tag='',cl='',upto=10)
url = 'https://www.icc-cricket.com/rankings/womens/team-rankings/odi'
top_odi_teams_women = get_top_odi_teams(url,tag='div',cl='rankings-block__container full rankings-table')
df_tow = pd.DataFrame(top_odi_teams_women)
#Interpreting Top 10 ODI teams in women's cricket
df_tow

Unnamed: 0,Pos,Name,Matches,Points,Ratings
0,1,Australia,15,2436,162
1,2,South Africa,24,2828,118
2,3,England,17,1993,117
3,4,India,20,2226,111
4,5,New Zealand,18,1696,94
5,6,West Indies,12,1025,85
6,7,Pakistan,15,1101,73
7,8,Bangladesh,5,306,61
8,9,Sri Lanka,11,519,47
9,10,Ireland,2,25,13


#### ii) Top 10 women’s ODI players along with the records of their team and rating. [https://www.icc-cricket.com/rankings/womens/player-rankings/odi](https://www.icc-cricket.com/rankings/womens/player-rankings/odi)

In [16]:
#Scraping top 10 batswoman ODI players using already defined function: get_top_odi_batsmen(url,tag='',att='',upto=10)
url = 'https://www.icc-cricket.com/rankings/womens/player-rankings/odi'
top_odi_batswomen = get_top_odi_batsmen(url)
df_topw = pd.DataFrame(top_odi_batswomen)
df_topw['Pos'] = df_topw['Pos'].str.extract('(\d+)')
#Interpreting top 10 batsmen in women's cricket
print("Top 10 Batswoman in ODI")
df_topw

Top 10 Batswoman in ODI


Unnamed: 0,Pos,Name,Team,Rating
0,1,Lizelle Lee,SA,773
1,2,Tammy Beaumont,ENG,765
2,3,Meg Lanning,AUS,749
3,4,Stafanie Taylor,WI,746
4,5,Alyssa Healy,AUS,741
5,6,Amy Satterthwaite,NZ,740
6,7,Smriti Mandhana,IND,719
7,8,Laura Wolvaardt,SA,699
8,9,Mithali Raj,IND,693
9,10,Ellyse Perry,AUS,691


In [17]:
#Scraping top 10 bowlers ODI players in women's cricket using already defined function: get_top_odi_bowlers(url,tag='',att='',upto=10)
url = 'https://www.icc-cricket.com/rankings/womens/player-rankings/odi'
top_odi_bowlers_women = get_top_odi_bowlers(url)
df_topwb = pd.DataFrame(top_odi_bowlers_women)
df_topwb['Pos'] = df_topwb['Pos'].str.extract('(\d+)')
#Interpreting top 10 bowlers in women's cricket
print("Top 10 Bowlers in ODI")
df_topwb

Top 10 Bowlers in ODI


Unnamed: 0,Pos,Name,Team,Rating
0,1,Jess Jonassen,AUS,773
1,2,Megan Schutt,AUS,735
2,3,Shabnim Ismail,SA,733
3,4,Marizanne Kapp,SA,725
4,5,Jhulan Goswami,IND,687
5,6,Ellyse Perry,AUS,666
6,7,Katherine Brunt,ENG,655
7,8,Poonam Yadav,IND,654
8,9,Ayabonga Khaka,SA,651
9,10,Shikha Pandey,IND,623


#### iii) Top 10 women’s ODI all-rounder along with the records of their team and rating. [https://www.icc-cricket.com/rankings/womens/player-rankings/odi](https://www.icc-cricket.com/rankings/womens/player-rankings/odi)

In [18]:
#Function to scrape top 10 ODI All-Rounders in women's cricket
def get_top_odi_all_rounders(url,tag='',att='',upto=10):
    page = requests.get(url)
    if page.status_code == 200:
        player_positions = []
        player_names = []
        player_teams = []
        player_ratings = []
        
        soup = BeautifulSoup(page.content,'html.parser')
        top_player = soup.find_all('div',class_='rankings-block__top-player')[2]
        
        player_pos = top_player.find('div',class_='rankings-block__banner--pos').text.strip('\n')
        player_pos = player_pos.strip(' ')
        player_pos = player_pos.strip('\n')
        player_positions.append(player_pos)
        
        player_name = top_player.find('div',class_='rankings-block__banner--name').text.strip('\n')
        player_names.append(player_name)
        
        player_team = top_player.find('div',class_='rankings-block__banner--nationality').text.strip('\n')
        player_team = player_team.strip(' ')
        player_team = player_team.strip('\n')
        player_teams.append(player_team)
        
        top_player_rating = soup.find('div',class_='rankings-block__banner--rating').text.strip('\n')
        player_ratings.append(top_player_rating)
        
        other_players = soup.find_all('table',class_='table')[2].tbody
        other_player_rows = other_players.find_all('tr')[0:upto-1]
        for rows in other_player_rows:
            cols = rows.find_all('td')
            
            position = cols[0].text.strip('\n')
            position = position.strip(' ')
            position = position.strip('\n')
            position = position.strip('\xa0')
            player_positions.append(position)
            
            name = cols[1].text.strip('\n')
            player_names.append(name)
            
            team = cols[2].text.strip('\n')
            team = team.strip(' ')
            team = team.strip('\n')
            player_teams.append(team)
            
            rating = cols[3].text.strip('\n')
            player_ratings.append(rating)
            
        top_odi_all_rounders = {"Pos":player_positions,"Name":player_names,"Team":player_teams,"Rating":player_ratings}
        
        return top_odi_all_rounders
    else:
        return {"Error":[page.status_code]}

In [19]:
url = 'https://www.icc-cricket.com/rankings/womens/player-rankings/odi'
top_odi_all_rounders_w = get_top_odi_all_rounders(url)
df_toaw = pd.DataFrame(top_odi_all_rounders_w)
#Interpreting Top 10 ODI All Rounders in Women's Cricket
df_toaw

Unnamed: 0,Pos,Name,Team,Rating
0,1,Ellyse Perry,AUS,773
1,2,Stafanie Taylor,WI,410
2,3,Marizanne Kapp,SA,390
3,4,Deepti Sharma,IND,357
4,5,Natalie Sciver,ENG,349
5,6,Jess Jonassen,AUS,301
6,7,Sophie Devine,NZ,274
7,8,Dane van Niekerk,SA,252
8,9,Katherine Brunt,ENG,236
9,10,Ashleigh Gardner,AUS,223


#### 7. Write a python program to scrape details of all the mobile phones under Rs. 20,000 listed on [Amazon.in](https://www.amazon.in/). The scraped data should include Product Name, Price, Image URL and Average Rating.

In [20]:
#Function to scrape details of all the mobile phones
def get_mobile_phone_details(url,tag,cl):
    page_m = requests.get(url)
    t_pages = 1
    if page_m.status_code == 200:
        soup_p = BeautifulSoup(page_m.content,'html.parser')
        total_content = soup_p.find('ul',class_='a-pagination')
        total_pages = total_content.find_all('li',class_='a-disabled')[2]
        t_pages = total_pages.text.strip('\n')
        
        names = []
        prices = []
        img_srcs = []
        ratings = []
        for pnumber in range(1,int(t_pages)+1):
            current_url = url.strip('page=1')
            current_url = f"{current_url}page={pnumber}"
            page = requests.get(current_url)
            if page.status_code == 503:
                while page.status_code != 200:
                    print(f"Error: {page.status_code}, Re-loading page: {pnumber}...[Total_Pages={int(t_pages)}]")
                    page = requests.get(current_url)
                    
            elif page.status_code != 200:
                return {"Error_":[page.status_code]}
            
            soup = BeautifulSoup(page.content,'html.parser')
            
            
            product_container = soup.find(tag,class_=cl)
            product_columns = product_container.find_all('div',class_='sg-col-inner')
            product_columns = product_columns[0:len(product_columns)-1]
        
        
            for cols in product_columns:
                name = cols.find('div',class_='a-section a-spacing-none a-spacing-top-small')
                name = name.text.strip('\n') if name else None
                names.append(name)

                price = cols.find('span',class_='a-offscreen')
                price = price.text.strip('\n') if price else None
                prices.append(price)

                img_src = cols.find('div',class_='a-section aok-relative s-image-square-aspect')
                img_src = img_src.img['src'] if img_src else None
                img_srcs.append(img_src)

                rating = cols.find('span',class_='a-icon-alt')
                rating = rating.text.strip('\n') if rating else None
                ratings.append(rating)

        product_details = {"Product Name":names,"Price":prices,"Image URL":img_srcs,"Average Rating":ratings}
        
        return product_details
    else:
        return {"Error":[page_m.status_code]}

In [34]:
url = "https://www.amazon.in/s?i=electronics&bbn=1805560031&rh=n%3A976419031%2Cn%3A976420031%2Cn%3A1389401031%2Cn%3A1389432031%2Cn%3A1805560031%2Cp_85%3A10440599031%2Cp_36%3A-2000000&page=1"
product_details = get_mobile_phone_details(url,tag='div',cl='s-main-slot s-result-list s-search-results sg-row')

Error: 503, Re-loading page: 1...[Total_Pages=61]
Error: 503, Re-loading page: 1...[Total_Pages=61]
Error: 503, Re-loading page: 1...[Total_Pages=61]
Error: 503, Re-loading page: 1...[Total_Pages=61]
Error: 503, Re-loading page: 1...[Total_Pages=61]
Error: 503, Re-loading page: 1...[Total_Pages=61]
Error: 503, Re-loading page: 1...[Total_Pages=61]
Error: 503, Re-loading page: 1...[Total_Pages=61]
Error: 503, Re-loading page: 2...[Total_Pages=61]
Error: 503, Re-loading page: 2...[Total_Pages=61]
Error: 503, Re-loading page: 2...[Total_Pages=61]
Error: 503, Re-loading page: 2...[Total_Pages=61]
Error: 503, Re-loading page: 2...[Total_Pages=61]
Error: 503, Re-loading page: 2...[Total_Pages=61]
Error: 503, Re-loading page: 2...[Total_Pages=61]
Error: 503, Re-loading page: 2...[Total_Pages=61]
Error: 503, Re-loading page: 2...[Total_Pages=61]
Error: 503, Re-loading page: 2...[Total_Pages=61]
Error: 503, Re-loading page: 2...[Total_Pages=61]
Error: 503, Re-loading page: 2...[Total_Pages=61]


Error: 503, Re-loading page: 29...[Total_Pages=61]
Error: 503, Re-loading page: 29...[Total_Pages=61]
Error: 503, Re-loading page: 29...[Total_Pages=61]
Error: 503, Re-loading page: 29...[Total_Pages=61]
Error: 503, Re-loading page: 29...[Total_Pages=61]
Error: 503, Re-loading page: 29...[Total_Pages=61]
Error: 503, Re-loading page: 29...[Total_Pages=61]
Error: 503, Re-loading page: 30...[Total_Pages=61]
Error: 503, Re-loading page: 30...[Total_Pages=61]
Error: 503, Re-loading page: 34...[Total_Pages=61]
Error: 503, Re-loading page: 34...[Total_Pages=61]
Error: 503, Re-loading page: 35...[Total_Pages=61]
Error: 503, Re-loading page: 35...[Total_Pages=61]
Error: 503, Re-loading page: 36...[Total_Pages=61]
Error: 503, Re-loading page: 36...[Total_Pages=61]
Error: 503, Re-loading page: 36...[Total_Pages=61]
Error: 503, Re-loading page: 37...[Total_Pages=61]
Error: 503, Re-loading page: 37...[Total_Pages=61]
Error: 503, Re-loading page: 37...[Total_Pages=61]
Error: 503, Re-loading page: 38

In [35]:
df_pd = pd.DataFrame(product_details)
#Interpreting Product Details
df_pd

Unnamed: 0,Product Name,Price,Image URL,Average Rating
0,"Samsung Galaxy M12 (Blue,4GB RAM, 64GB Storage...","₹10,999",https://m.media-amazon.com/images/I/71yYaNztZ0...,5.0 out of 5 stars
1,"Redmi 9A (Midnight Black, 2GB RAM, 32GB Storag...","₹6,999",https://m.media-amazon.com/images/I/71hDPUM7VP...,4.2 out of 5 stars
2,"Redmi Note 10 (Shadow Black, 6GB RAM, 128GB St...","₹13,999",https://m.media-amazon.com/images/I/71IqJQM2st...,
3,"Redmi 9A (Nature Green, 2GB Ram, 32GB Storage)...","₹6,999",https://m.media-amazon.com/images/I/71hDPUM7VP...,4.2 out of 5 stars
4,"Redmi 9 (Sky Blue, 4GB RAM, 64GB Storage)","₹8,799",https://m.media-amazon.com/images/I/71A9Vo1Bat...,4.1 out of 5 stars
...,...,...,...,...
1450,COVERNEW Flip Cover for HTC Desire 820 Dual si...,₹278,https://m.media-amazon.com/images/I/51b6luyRlK...,
1451,COVERNEW Flip Cover for HTC Desire 816 Dual si...,₹278,https://m.media-amazon.com/images/I/51b6luyRlK...,
1452,COVERNEW Flip Cover for HTC Desire 820 Dual si...,₹248,https://m.media-amazon.com/images/I/41IaQBA3Yw...,
1453,Kanex USB 3 Gigabit Adapter,"₹2,205",https://m.media-amazon.com/images/I/61Iscy667D...,3.4 out of 5 stars


#### 8. Write a python program to extract information about the local weather from the National Weather Service website of USA, [https://www.weather.gov/](https://www.weather.gov/) for the city, San Francisco. You need to extract data about 7 day extended forecast display for the city. The data should include period, short description, temperature and description. 

In [36]:
#Function to scrape data of 7 day extended forecast
def get_weather_forecast(url):
    page = requests.get(url)
    if page.status_code == 200:
        soup = BeautifulSoup(page.content,'html.parser')
        info_container = soup.find('ul',id='seven-day-forecast-list')
        forecast_containers = info_container.find_all('li',class_='forecast-tombstone')
        
        periods = []
        short_descriptions =[]
        temperatures = []
        descriptions = []
        for forecast in forecast_containers:
            period = forecast.find('p',class_='period-name').text.strip('\n')
            periods.append(period)
            
            short_description = forecast.find('p',class_='short-desc').text.strip('\n')
            short_descriptions.append(short_description)
            
            temperature = forecast.find('p',class_='temp').text.strip('\n')
            temperatures.append(temperature)
            
            description = forecast.find('img',class_='forecast-icon')['alt'].strip('\n')
            descriptions.append(description)
            
        forecast_details = {"Period":periods,"Short Description":short_descriptions,"Temperature":temperatures,"Description":descriptions}
        
        return forecast_details
    else:
        return {"Error":[page.status_code]}

In [37]:
#Weather Forecast for city San Francisco
url = 'https://forecast.weather.gov/MapClick.php?lat=37.7771&lon=-122.4196#.YFbv4q8zbIU'
forecast= get_weather_forecast(url)

df_forecast = pd.DataFrame(forecast)
#Interpreting 7 days extended forecast data
df_forecast

Unnamed: 0,Period,Short Description,Temperature,Description
0,Overnight,Clear,Low: 47 °F,"Overnight: Clear, with a steady temperature ar..."
1,Sunday,Sunny,High: 63 °F,"Sunday: Sunny, with a high near 63. Light nort..."
2,SundayNight,Mostly Clear,Low: 46 °F,"Sunday Night: Mostly clear, with a low around ..."
3,Monday,Sunny,High: 63 °F,"Monday: Sunny, with a high near 63. Northwest ..."
4,MondayNight,Mostly Clear,Low: 47 °F,"Monday Night: Mostly clear, with a low around ..."
5,Tuesday,Sunny,High: 67 °F,"Tuesday: Sunny, with a high near 67."
6,TuesdayNight,Clear,Low: 50 °F,"Tuesday Night: Clear, with a low around 50."
7,Wednesday,Sunny,High: 67 °F,"Wednesday: Sunny, with a high near 67."
8,WednesdayNight,Partly Cloudy,Low: 48 °F,"Wednesday Night: Partly cloudy, with a low aro..."
