In [12]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [68]:
def df_creater(destination):
    
    #read html file
    with open(destination+'.html','r',encoding='utf-8') as f:
        html = f.read()
    soup = BeautifulSoup(html,'lxml')
    dests = soup.find_all('section',{'style':'display: flex; max-width: 100%;'})
    
    
    location = []
    package_name = []
    stay_duration = []
    cities_covered = []
    other_cities = []
    facilities_given = []
    sights_covered = []
    discount_type = []
    price_per_person = []
    total_price = []
    image_url = []


    for dest in dests:
        
        #a column with destination to indetify after combining all df's
        location.append(destination)

        try:
            title = dest.find('p').text
            package_name.append(title)
        except:
            package_name.append(np.nan)


        try:
            duration = dest.find('span',{'class':'selected'}).text
            stay_duration.append(duration)
        except:
            stay_duration.append(np.nan)


        try:
            iternary = [i.get_text() for i in dest.find_all('div',{'class':'itineraryList'})[0].find_all('span') if not i.get('class')]
            cities_covered.append(iternary)
        except:
            cities_covered.append(iternary)
            
        try:
            extra = dest.find('span',{'class':'itineraTooltipContainer'}).get_text()
            other_cities.append(extra)
        except:
            other_cities.append(np.nan)


        try:
            facilities = [i.text for i in dest.find_all('ul',{'class':'tripListWrapper'})[0].find_all('li')]
            facilities_given.append(facilities)
        except:
            facilities_given.append(np.nan)


        try:
            sighting = [i.text for i in dest.find_all('ul',{'class':'visitListWrapper'})[0].find_all('li')]
            sights_covered.append(sighting)
        except:
            sights_covered.append(np.nan)


        try:
            flight_discount_or_emi = dest.find_all('span',{'class':'bold'})[0].text
            discount_type.append(flight_discount_or_emi)
        except:
            discount_type.append(np.nan)


        try:
            price_person = dest.find('span',{'class','priceStyle'}).text
            price_per_person.append(price_person)
        except:
            price_per_person.append(np.nan)


        try:
            price = dest.find('div',{'class','rightSec'}).find_all('span')[3].text
            total_price.append(price)
        except:
            total_price.append(np.nan)


        try:
            url = dest.find_all('img',{'class':'active'})[0]['src']
            image_url.append(url)
        except:
            image_url.append(np.nan)
            
        
    col_dict = {'location':location,'package_name':package_name,'stay_duration':stay_duration,
              'cities_covered_with_duration':cities_covered,'other_cities_covered':other_cities,'facilities':facilities_given,
             'sights_included':sights_covered,'discount_type':discount_type,'price_per_person':price_per_person,
             'total_price':total_price,'image_url':image_url}
    
    
    return pd.DataFrame(col_dict)
    




In [69]:
destinations = ['Maharashtra', "Goa", "Rajasthan", "Andaman", 'Kerala', 'Kashmir', 'Himachal%20Pradesh',
                'South%20India', 'North%20East', 'Ladakh', 'Gujarat', 'Uttarakhand']

destinations = [destination.replace('%20', '_') for destination in destinations]

destinations

['Maharashtra',
 'Goa',
 'Rajasthan',
 'Andaman',
 'Kerala',
 'Kashmir',
 'Himachal_Pradesh',
 'South_India',
 'North_East',
 'Ladakh',
 'Gujarat',
 'Uttarakhand']

In [70]:
df_mh = df_creater('Maharashtra')
df_goa = df_creater('Goa')
df_raj = df_creater('Rajasthan')
df_andaman = df_creater('Andaman')
df_kerala = df_creater('Kerala')
df_kashmir = df_creater('Kashmir')
df_hp = df_creater('Himachal%20Pradesh')
df_si = df_creater('South%20India')
df_ne = df_creater('North%20East')
df_ladakh = df_creater('Ladakh')
df_guj = df_creater('Gujarat')
df_uk = df_creater('Uttarakhand')

In [71]:
df = pd.concat([df_mh, df_goa, df_raj, df_andaman, df_kerala, 
                         df_kashmir, df_hp, df_si, df_ne, df_ladakh, 
                         df_guj, df_uk], axis=0, ignore_index=True)

In [72]:
df

Unnamed: 0,location,package_name,stay_duration,cities_covered_with_duration,other_cities_covered,facilities,sights_included,discount_type,price_per_person,total_price,image_url
0,Maharashtra,Short Shirdi Holiday by Bus,1N/2D,[1N Shirdi],,"[3 Star Hotel, Airport Pickup & Drop, 3 Activi...","[Bus Ticket, Visit to Shirdi Sai Baba Temple]","₹3,408/month","₹10,537","₹21,074",
1,Maharashtra,Peaceful Mahabaleshwar Holiday - From Pune,2N/3D,[2N Mahabaleshwar],,"[Round Trip Flights, 3 Star Hotel, Airport Tra...","[Visit to Venna Lake, Mapro Garden, Pratapgarh...","₹2,000","₹27,269","₹54,538",
2,Maharashtra,4N Mumbai & Lonavala Trip with Siddhivinayak D...,4N/5D,"[2N Mumbai, 2N Lonavala And Khandala]",,"[Round Trip Flights, Intercity Car Transfers, ...","[Visit to Siddhivinayak Temple, Haji Ali, Juhu...","₹2,199","₹29,984","₹59,968",
3,Maharashtra,Sacred Trip to Shirdi,1N/2D,[1N Shirdi],,"[Round Trip Flights, 3 Star Hotel, Airport Tra...","[Visit to Khandoba Mandir, Shirdi Sai Baba Tem...","₹1,447","₹19,729","₹39,458",
4,Maharashtra,Mesmerising Mumbai Trip,3N/4D,[3N Mumbai],,"[Round Trip Flights, 4 Star Hotel, Airport Tra...","[Visit to Gateway of India, Marine Drive, Maha...","₹2,362","₹32,212","₹64,424",
...,...,...,...,...,...,...,...,...,...,...,...
1857,Uttarakhand,"Peaceful Corbett, Ranikhet & Nainital Getaway",5N/6D,"[1N Corbett, 2N Ranikhet, 2N Nainital]",,"[Intercity Car Transfers, 3 Star Hotels, Airpo...","[Visit to Jhula Devi Temple, Chaubatia Gardens...",,"₹23,715","₹47,430",https://hldak.mmtcdn.com/prod-s3-hld-hpcmsadmi...
1858,Uttarakhand,Serene Nainital and Ranikhet Getaway,4N/5D,"[3N Nainital, 1N Ranikhet]",,"[Intercity Car Transfers, 3 Star Hotels, Airpo...","[Visit to Naina devi Temple, Mall road, Naini ...",,"₹20,517","₹41,034",
1859,Uttarakhand,Exotic Mussoorie with Nainital,4N/5D,"[2N Mussoorie, 2N Nainital]",,"[Intercity Car Transfers, 3 Star Hotels, Airpo...","[Visit to Mall road, Lake Mist, Kempty Falls, ...",,"₹18,645","₹37,290",
1860,Uttarakhand,"Picturesque Nainital, Ranikhet & Corbett- From...",5N/6D,"[2N Nainital, 1N Ranikhet, 2N Corbett]",,"[Intercity Car Transfers, 3 Star Hotels, Airpo...","[Visit to Naina devi Temple, Mall road, Naini ...",,"₹23,869","₹47,738",https://hldak.mmtcdn.com/prod-s3-hld-hpcmsadmi...


In [73]:
df.to_excel('holiday_packages.xlsx')