<a href="https://colab.research.google.com/github/aman-welzin/credzin/blob/main/scrapers/bank_scrapers_beautiful_soup.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain langchain-community bs4 pypdf chromadb

[31mERROR: Could not open requirements file: [Errno 2] No such file or directory: 'requirements.txt'[0m[31m
[0m

# Axis Bank Scraper using Beautiful Soup

In [None]:
import csv
import os
from bs4 import BeautifulSoup
import requests
from urllib.parse import urljoin

# URL of the page to scrape
BANK_URL = "https://www.axisbank.com/retail/cards/credit-card"

# Hardcoded Apply Now link
APPLY_NOW_LINK = "https://web.axisbank.co.in/DigitalChannel/WebForm/?index6&utm_content=cclisting&utm_campaign=cciocl&utm_source=website&axisreferralcode=iocllisting"

# Specify the full path for the CSV file
CSV_FILE = 'axis_credit_cards.csv'

# Print the file path for debugging
print(f"CSV file will be saved at: {os.path.abspath(CSV_FILE)}")

# Function to scrape reward information from the "Know More" page
def scrape_reward_information(url):
    # Fetch the "Know More" page
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Failed to retrieve the page: {url}. Status code: {response.status_code}")
        return []

    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all reward cards
    reward_cards = soup.find_all('div', class_='temp1-card swiper-slide')

    # Create a list to store reward data
    rewards_data = []

    # Loop through each reward card and extract details
    for card in reward_cards:
        try:
            # Extract the reward title
            title_tag = card.find('h3')
            title = title_tag.text.strip() if title_tag else "N/A"

            # Extract the reward description
            description_tag = card.find('div', class_='moreCont')
            description = description_tag.text.strip() if description_tag else "N/A"

            # Extract the terms and conditions link (if available)
            terms_link_tag = card.find('a', class_='pdf-data')
            terms_link = urljoin(url, terms_link_tag['href']) if terms_link_tag and 'href' in terms_link_tag.attrs else "No link available"

            # Add reward data to the list
            rewards_data.append({
                'Title': title,
                'Description': description,
                'Terms and Conditions Link': terms_link
            })
        except Exception as e:
            print(f"Error processing reward card: {e}")

    return rewards_data

# Function to scrape credit card data from the bank URL
def scrape_bank_credit_cards(url):
    # Send a GET request to fetch the page content
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Failed to retrieve the page: {url}. Status code: {response.status_code}")
        return []

    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all credit card items
    card_items = soup.find_all('div', class_='card-item')

    # Create a list to store card data
    cards_data = []

    # Loop through each card item and extract details
    for card in card_items:
        try:
            # Extract card name
            card_name_tag = card.find('h3')
            card_name = card_name_tag.get_text(strip=True).replace('\n', ' ') if card_name_tag else "N/A"

            # Extract features (list items)
            features = [li.get_text(strip=True) for li in card.find_all('li')]

            # Extract joining fee
            joining_fee_tag = card.find('p', string=lambda x: x and 'Joining Fee' in x)
            if joining_fee_tag:
                joining_fee = joining_fee_tag.find('strong').get_text(strip=True) if joining_fee_tag.find('strong') else joining_fee_tag.get_text(strip=True)
            else:
                # Alternative approach: Search for text containing "Joining Fee"
                joining_fee_text = card.find(string=lambda x: x and 'Joining Fee' in x)
                if joining_fee_text:
                    joining_fee = joining_fee_text.find_next('strong').get_text(strip=True) if joining_fee_text.find_next('strong') else joining_fee_text.strip()
                else:
                    joining_fee = "N/A"

            # Extract annual fee
            annual_fee_tag = card.find('p', string=lambda x: x and 'Annual Fee' in x)
            if annual_fee_tag:
                annual_fee = annual_fee_tag.find('strong').get_text(strip=True) if annual_fee_tag.find('strong') else annual_fee_tag.get_text(strip=True)
            else:
                # Alternative approach: Search for text containing "Annual Fee"
                annual_fee_text = card.find(string=lambda x: x and 'Annual Fee' in x)
                if annual_fee_text:
                    annual_fee = annual_fee_text.find_next('strong').get_text(strip=True) if annual_fee_text.find_next('strong') else annual_fee_text.strip()
                else:
                    annual_fee = "N/A"

            # Extract "Know More" link
            know_more_link_tag = card.find('a', class_='btn1')
            know_more_link = urljoin(url, know_more_link_tag['href']) if know_more_link_tag else "N/A"

            # Use the hardcoded Apply Now link
            apply_now_link = APPLY_NOW_LINK

            # Extract credit card image URL
            image_div = card.find('div', class_='cards-img')
            if image_div:
                image_tag = image_div.find('img')
                if image_tag and 'src' in image_tag.attrs:
                    image_url = urljoin(url, image_tag['src'])  # Convert relative URL to absolute URL
                else:
                    image_url = "N/A"
            else:
                image_url = "N/A"

            # Scrape reward information from the "Know More" page
            rewards = []
            if know_more_link != "N/A":
                rewards = scrape_reward_information(know_more_link)

            # Add card data to the list
            cards_data.append({
                'Card Name': card_name,
                'Features': features,  # Keep features as a list for now
                'Joining Fee': joining_fee,
                'Annual Fee': annual_fee,
                'Know More Link': know_more_link,
                'Apply Now Link': apply_now_link,
                'Image URL': image_url,
                'Rewards': rewards  # Add rewards as a list of dictionaries
            })
        except Exception as e:
            print(f"Error processing card: {e}")

    return cards_data

# Check if the CSV file already exists
if os.path.exists(CSV_FILE):
    # Read existing data to avoid duplicates
    existing_card_names = set()
    with open(CSV_FILE, 'r', newline='', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            existing_card_names.add(row['Card Name'])

    # Open the CSV file in append mode
    with open(CSV_FILE, 'a', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=['Card Name', 'Features', 'Joining Fee', 'Annual Fee', 'Know More Link', 'Apply Now Link', 'Image URL', 'Rewards'])

        # Scrape data from Axis Bank
        print(f"Scraping data from: {BANK_URL}")
        new_cards_data = scrape_bank_credit_cards(BANK_URL)

        # Append only new cards
        for card in new_cards_data:
            if card['Card Name'] not in existing_card_names:
                # Convert features and rewards lists to strings enclosed in quotes
                card['Features'] = '"' + ', '.join(card['Features']) + '"'
                card['Rewards'] = '"' + str(card['Rewards']) + '"'
                writer.writerow(card)
                print(f"Added: {card['Card Name']}")
            else:
                print(f"Skipped (already exists): {card['Card Name']}")
else:
    # Create a new CSV file and write the header
    with open(CSV_FILE, 'w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=['Card Name', 'Features', 'Joining Fee', 'Annual Fee', 'Know More Link', 'Apply Now Link', 'Image URL', 'Rewards'])
        writer.writeheader()  # Write the header row

        # Scrape data from Axis Bank
        print(f"Scraping data from: {BANK_URL}")
        new_cards_data = scrape_bank_credit_cards(BANK_URL)

        # Write all new cards
        for card in new_cards_data:
            # Convert features and rewards lists to strings enclosed in quotes
            card['Features'] = '"' + ', '.join(card['Features']) + '"'
            card['Rewards'] = '"' + str(card['Rewards']) + '"'
            writer.writerow(card)
            print(f"Added: {card['Card Name']}")

CSV file will be saved at: /content/axis_credit_cards.csv
Scraping data from: https://www.axisbank.com/retail/cards/credit-card
Added: IndianOil Axis BankCredit Card
Added: RewardsCredit Card
Added: Axis Bank MagnusCredit Card
Added: Axis Bank PrivilegeCredit Card
Added: Flipkart Axis BankCredit Card
Added: Axis Bank MY ZoneCredit Card
Added: Axis Bank NeoCredit Card
Added: Axis Bank SelectCredit Card
Added: Axis Bank AtlasCredit Card
Added: Axis Bank AURACredit Card
Added: IndianOil Axis Bank PremiumCredit Card
Added: Axis Bank ACECredit Card
Added: Axis Bank Pride PlatinumCredit Card
Added: Axis Bank Pride SignatureCredit Card
Added: Axis Bank MY Zone EasyCredit Card
Added: Privilege EasyCredit Card
Added: Axis Bank Signature Credit Card with Lifestyle Benefits
Added: PlatinumCredit Card
Added: Titanium Smart TravelerCredit Card
Added: Axis Bank My WingsCredit Card
Added: Flipkart Axis Bank Super EliteCredit Card
Added: HORIZONCredit Card
Added: SpiceJet Axis Bank Voyage BlackCredit 

In [None]:
Axis_df

Unnamed: 0,Card Name,Features,Joining Fee,Annual Fee,Know More Link,Apply Now Link,Image URL,Rewards
0,IndianOil Axis BankCredit Card,"""Now seamlessly Pay with UPI using your credit...",₹ 500,₹ 500,https://www.axisbank.com/retail/cards/credit-c...,https://web.axisbank.co.in/DigitalChannel/WebF...,https://www.axisbank.com/images/default-source...,"""[{'Title': 'Welcome Benefit - Earn 100% value..."
1,RewardsCredit Card,"""Milestone benefit of 1,500 EDGE reward points...",₹ 1000,"₹ 1000 (waived on spends of INR 2,00,000 in an...",https://www.axisbank.com/retail/cards/credit-c...,https://web.axisbank.co.in/DigitalChannel/WebF...,https://www.axisbank.com/images/default-source...,"""[{'Title': 'Welcome Benefit', 'Description': ..."
2,Axis Bank MagnusCredit Card,"""Choose any one voucher worth INR 12,500 from ...","₹ 12,500","₹ 12,500",https://www.axisbank.com/retail/cards/credit-c...,https://web.axisbank.co.in/DigitalChannel/WebF...,https://www.axisbank.com/images/default-source...,"""[]"""
3,Axis Bank PrivilegeCredit Card,"""12,500 EDGE RPs redeemable against vouchers w...",₹ 1500,₹ 1500,https://www.axisbank.com/retail/cards/credit-c...,https://web.axisbank.co.in/DigitalChannel/WebF...,https://www.axisbank.com/images/default-source...,"""[{'Title': 'Milestone Benefit', 'Description'..."
4,Flipkart Axis BankCredit Card,"""Welcome benefits worth Rs. 600, Unlimited cas...",₹ 500,₹ 500,https://www.axisbank.com/retail/cards/credit-c...,https://web.axisbank.co.in/DigitalChannel/WebF...,https://www.axisbank.com/images/default-source...,"""[{'Title': 'Welcome Benefits', 'Description':..."
5,Axis Bank MY ZoneCredit Card,"""Complimentary Sony LIV Premium subscription f...",₹ 0,₹ 0,https://www.axisbank.com/retail/cards/credit-c...,https://web.axisbank.co.in/DigitalChannel/WebF...,https://www.axisbank.com/images/default-source...,"""[]"""
6,Axis Bank NeoCredit Card,"""Monthly 2 times – Up to Rs. 120 off* at Zomat...",₹ 0,₹ 0,https://www.axisbank.com/retail/cards/credit-c...,https://web.axisbank.co.in/DigitalChannel/WebF...,https://www.axisbank.com/images/default-source...,"""[{'Title': '40% off on Zomato', 'Description'..."
7,Axis Bank SelectCredit Card,"""10,000 EDGE REWARD Points worth Rs. 2,000, Fl...",₹ 3000,₹ 3000,https://www.axisbank.com/retail/cards/credit-c...,https://web.axisbank.co.in/DigitalChannel/WebF...,https://www.axisbank.com/images/default-source...,"""[]"""
8,Axis Bank AtlasCredit Card,"""Welcome benefits of 2,500 Miles, Accrue 5X mi...","₹ 5,000","₹ 5,000",https://www.axisbank.com/retail/cards/credit-c...,https://web.axisbank.co.in/DigitalChannel/WebF...,https://www.axisbank.com/images/default-source...,"""[]"""
9,Axis Bank AURACredit Card,"""Get welcome decathlon OMNI Gift Card worth IN...",,,https://www.axisbank.com/retail/cards/credit-c...,https://web.axisbank.co.in/DigitalChannel/WebF...,https://www.axisbank.com/images/default-source...,"""[]"""


# SBI Bank Scraper using Beautiful Soup

In [None]:
import csv
import requests
from bs4 import BeautifulSoup
import os

# Define CSV file
CSV_FILE = "sbi_credit_cards.csv"

# Function to read existing card names from CSV
def get_existing_card_names(csv_file):
    if not os.path.exists(csv_file):
        return set()  # Return empty set if file doesn't exist

    existing_cards = set()
    with open(csv_file, mode="r", encoding="utf-8") as file:
        reader = csv.reader(file)
        next(reader, None)  # Skip header
        for row in reader:
            if row:
                existing_cards.add(row[0])  # Card Name is the first column
    return existing_cards

# Function to extract features from the "Learn more" page
def extract_features(learn_more_url):
    response = requests.get(learn_more_url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        tab_content_section = soup.find('section', class_='tab-content')
        if tab_content_section:
            tab_content = tab_content_section.find('div', class_='tab-inner-content', id='feature-1-tab')
            if tab_content:
                features = {}
                for feature in tab_content.find_all('li'):
                    heading = feature.find('h3')
                    if heading:
                        feature_name = heading.text.strip()
                        feature_details = [detail.text.strip() for detail in feature.find_all('li')]
                        features[feature_name] = feature_details
                return features
    return None

# Function to extract Fees (Joining Fee, Renewal Fee, Add-on Fee)
def extract_fees(learn_more_url):
    response = requests.get(learn_more_url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        fees_section = soup.find('div', class_='tab-inner-content', id='feature-2-tab')
        if fees_section:
            fees = {}
            fees_list = fees_section.find('h3', string="Fees")
            if fees_list:
                fee_items = fees_list.find_next('ul')
                if fee_items:
                    fee_details = [li.get_text(strip=True) for li in fee_items.find_all('li')]
                    fees["Fees"] = fee_details
            return fees
    return None

# Main URL of the credit cards page
main_url = "https://www.sbicard.com/en/personal/credit-cards.page#all-card-tab"

# Send a GET request to the main webpage
response = requests.get(main_url)

# Check if the request was successful
if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')
    card_containers = soup.find_all('div', class_='grid col-2')

    # Load existing cards
    existing_cards = get_existing_card_names(CSV_FILE)

    # Open CSV file in append mode
    with open(CSV_FILE, mode="a", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)

        # Write header only if file is empty
        if os.stat(CSV_FILE).st_size == 0:
            writer.writerow(["Card Name", "Benefits", "Features", "Fees", "Learn More URL", "Apply Now URL", "Front Image URL", "Back Image URL"])

        # Loop through each card container
        for container in card_containers:
            card_name = container.find_next('h4').text.strip()

            # Skip if already added
            if card_name in existing_cards:
                print(f"⚠️ Skipping {card_name} (Already in CSV)")
                continue

            learn_more_link = container.find_next('a', class_='learn-more-link')['href']
            learn_more_url = f"https://www.sbicard.com{learn_more_link}" if not learn_more_link.startswith('http') else learn_more_link

            # Extract Apply Now URL
            apply_now_tag = container.find('div', class_='item-footer')
            apply_now_url = "N/A"
            if apply_now_tag:
                apply_link = apply_now_tag.find('a', class_='button primary')
                if apply_link and 'href' in apply_link.attrs:
                    apply_now_url = f"https://www.sbicard.com{apply_link['href']}" if not apply_link['href'].startswith('http') else apply_link['href']

            # Extract Card Images
            # front_image = container.find('source', class_='lazy').find('img')['src']
            front_image = container.find('picture')
            front_image1 = front_image.find('source')['srcset'] if front_image else 'N/A'
            filename, extension = os.path.splitext(os.path.basename(front_image1))

            # Create the new URL with .png extension
            new_image_url = os.path.dirname(front_image1) + "/" + filename + ".png"

            back_image = container.find('div', class_='card-features back').find('img')['src']

            # Extract Benefits
            benefits_section = container.find('ul')
            benefits = [li.get_text(strip=True).replace('Rs.', 'Rs. ') for li in benefits_section.find_all('li')] if benefits_section else []

            # Extract Features & Fees from "Learn More" page
            features = extract_features(learn_more_url)
            fees = extract_fees(learn_more_url)

            # Print extracted details for debugging
            print(f"✅ Adding {card_name}")
            print(f"Learn More URL: {learn_more_url}")
            print(f"Apply Now URL: {apply_now_url}")
            print(f"Front Image URL: {new_image_url}")
            print(f"Back Image URL: {back_image}")

            if benefits:
                print("\n**Benefits**")
                for benefit in benefits:
                    print(f"  - {benefit}")

            if features:
                print("\n**Features**")
                for feature, details in features.items():
                    print(f"{feature}:")
                    for detail in details:
                        print(f"  - {detail}")

            if fees and "Fees" in fees:
                print("\n**Fees**")
                for detail in fees["Fees"]:
                    print(f"  - {detail}")

            print("-" * 50)

            # Write new data to CSV file
            writer.writerow([
                card_name,
                ", ".join(benefits),
                str(features) if features else "N/A",
                str(fees["Fees"]) if fees and "Fees" in fees else "N/A",
                learn_more_url,
                apply_now_url,
                new_image_url,
                back_image
            ])

    print(f"\n✅ Data successfully saved to {CSV_FILE}")

else:
    print(f"❌ Failed to retrieve the webpage. Status code: {response.status_code}")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  - Click here to know more
Balance Transfer on EMI:
  - Save money while paying your credit card dues
  - Transfer the outstanding balance of other banks’ credit cards to your City Union Bank SimplySAVE SBI Card, to avail a lower rate of interest and pay back in EMIs
  - Log onto sbicard.com with your user ID and password to avail this service
  - Click here to know more

**Fees**
  - Annual Fee (one time):Rs.499 + Taxes
  - Renewal Fee (per annum):Rs.499 + Taxes from second year onwards. Renewal Fee reversed if annual spends for last year >= Rs. 1,00,000.
  - Add-on Fee (per annum): NIL
--------------------------------------------------
✅ Adding Karnataka Bank SBI Card PRIME
Learn More URL: https://www.sbicard.com/en/personal/credit-cards/banking-partnership/karnataka-bank-sbi-card-prime.page
Apply Now URL: N/A
Front Image URL: https://www.sbicard.com/sbi-card-en/assets/media/images/personal/credit-cards/network-card-im

In [None]:
SBI_df

Unnamed: 0,Card Name,Benefits,Features,Fees,Learn More URL,Apply Now URL,Front Image URL,Back Image URL
0,SBI Card Miles Elite,"Get 5,000 Travel Credits as Welcome Gift, Earn...","{'Terms and Conditions': [], 'Welcome Gift': [...","['Annual Fee (one-time):Rs.4999 + GST', 'Renew...",https://www.sbicard.com/en/personal/credit-car...,https://www.sbicard.com/en/eapply/eapplyform.p...,https://www.sbicard.com/sbi-card-en/assets/med...,https://www.sbicard.com/sbi-card-en/assets/med...
1,SBI Card PULSE,"Annual Fee (one-time):Rs. 1,499 + Taxes, Annua...",{'Terms and Conditions': ['For detailed Terms ...,"['Annual Fee (one time):Rs.1,499 + Taxes', 'Re...",https://www.sbicard.com/en/personal/credit-car...,https://www.sbicard.com/en/eapply/eapplyform.p...,https://www.sbicard.com/sbi-card-en/assets/med...,https://www.sbicard.com/sbi-card-en/assets/med...
2,SimplyCLICK SBI Card,Get Amazon.in gift card worthRs. 500* on payme...,{'Contactless Advantage': ['Daily purchases no...,"['Annual Fee (one time):Rs.499', 'Renewal Fee ...",https://www.sbicard.com/en/personal/credit-car...,https://www.sbicard.com/en/eapply/eapplyform.p...,https://www.sbicard.com/sbi-card-en/assets/med...,https://www.sbicard.com/sbi-card-en/assets/med...
3,CASHBACK SBI Card,"5% cashback on online spends, 1% cashback on o...",{'Terms and Conditions': ['For detailed Terms ...,"['Joining Fee (one time):\xa0Rs. 999', 'Renewa...",https://www.sbicard.com/en/personal/credit-car...,https://www.sbicard.com/en/eapply/eapplyform.p...,https://www.sbicard.com/sbi-card-en/assets/med...,https://www.sbicard.com/sbi-card-en/assets/med...
4,SBI Card ELITE,"Welcome e-Gift Voucher worthRs. 5,000 on joini...",{'Terms and Conditions': ['For detailed Terms ...,"['Annual Fee (one-time) :Rs.4,999', 'Renewal F...",https://www.sbicard.com/en/personal/credit-car...,https://www.sbicard.com/en/eapply/eapplyform.p...,https://www.sbicard.com/sbi-card-en/assets/med...,https://www.sbicard.com/sbi-card-en/assets/med...
...,...,...,...,...,...,...,...,...
151,KVB SBI Card PRIME,"Welcome e-Gift Voucher worth Rs. 3,000 on joi...",{'Terms and Conditions': ['For detailed Terms ...,"['Annual Fee (one time):Rs.2,999 + Taxes', 'Re...",https://www.sbicard.com/en/personal/credit-car...,,https://www.sbicard.com/sbi-card-en/assets/med...,https://www.sbicard.com/sbi-card-en/assets/med...
152,KVB SimplySAVE SBI Card,"2,000 bonus reward points on spends of Rs. 2,...",{'Reward Point': ['Earn 1 Reward Point on ever...,"['Annual Fee (one time):Rs.499 + Taxes', 'Rene...",https://www.sbicard.com/en/personal/credit-car...,,https://www.sbicard.com/sbi-card-en/assets/med...,https://www.sbicard.com/sbi-card-en/assets/med...
153,KVB SBI Signature Card,"Welcome gift voucher worthRs. 5,000 from popul...",{'True lifestyle': ['Enjoy access to more than...,"['Annual fee, one time:Rs.4,999', 'Renewal fee...",https://www.sbicard.com/en/personal/credit-car...,,https://www.sbicard.com/sbi-card-en/assets/med...,https://www.sbicard.com/sbi-card-en/assets/med...
154,Karur Vysya Bank - SBI Platinum Credit Card,"Welcome Gift voucher worthRs. 3,000 from any ...","{'Renewal Gift': ['Get e-vouchers worth Rs. 3,...","['Annual Fee, one time:Rs.2,999', 'Renewal Fee...",https://www.sbicard.com/en/personal/credit-car...,,https://www.sbicard.com/sbi-card-en/assets/med...,https://www.sbicard.com/sbi-card-en/assets/med...
