In [32]:
pip install pdfplumber pyspark openai requests bs4 selenium webdriver-manager faiss-cpu transformers sentence-transformers

Note: you may need to restart the kernel to use updated packages.


In [33]:
import pdfplumber
import pandas as pd

def extract_pdf_data(file_path):
    """
    Extracts data from the credit card PDF and structures it for a pandas DataFrame.
    """
    data = []
    with pdfplumber.open(file_path) as pdf:
        for page in pdf.pages:
            text = page.extract_text()
            lines = text.split("\n")
            
            # Parse lines for credit card information
            for i in range(len(lines)):
                if "%" in lines[i]:  # Heuristic: look for interest rate
                    # Extract card details (sample parsing logic, adjust as needed)
                    try:
                        card_name = lines[i - 3].strip()
                        bank = lines[i - 2].strip()
                        annual_fee = lines[i + 1].split(" ")[0].strip() if "fee" in lines[i + 1].lower() else "No annual fee"
                        interest_rate = float(lines[i].split("%")[0].strip())
                        rewards = None
                        
                        # Check for rewards (look at subsequent lines)
                        rewards_lines = []
                        j = i + 2
                        while j < len(lines) and ":" in lines[j]:
                            rewards_lines.append(lines[j].split(":")[1].strip())
                            j += 1
                        
                        if rewards_lines:
                            rewards = ", ".join(rewards_lines)
                        
                        # Append to data
                        data.append((card_name, bank, annual_fee, interest_rate, rewards))
                    except Exception as e:
                        print(f"Skipping line due to parsing error: {e}")
                        continue
    return data

# Path to the PDF file
pdf_path = "/Users/aaryas127/Documents/GitHub/credit_card_reward_maximizer/data_storage/SearchCreditCard-eng.pdf"

# Extract data from the PDF
parsed_data = extract_pdf_data(pdf_path)

# Create a pandas DataFrame from the parsed data
df = pd.DataFrame(parsed_data, columns=["Card Name", "Bank", "Annual Fee", "Purchase Interest Rate (%)", "Rewards"])

# Show the DataFrame
print(df)


                                             Card Name  \
0                                Desjardins Flexi Visa   
1                          MBNA True Line® Mastercard®   
2           Coast Capital Collabria Classic Mastercard   
3                     Tangerine Money-Back Credit Card   
4                           Tangerine World Mastercard   
..                                                 ...   
99          CIBC Aeroplan Visa Infinite Privilege Card   
100           TD Aeroplan Visa Infinite Privilege Card   
101  American Express® Aeroplan®* Reserve Card (Cre...   
102  American Express® Aeroplan®* Reserve Card (Cha...   
103                                  The Platinum Card   

                                                Bank     Annual Fee  \
0                                         Desjardins  No annual fee   
1    MBNA is a division of The Toronto-Dominion Bank  No annual fee   
2                              Coast Capital Savings  No annual fee   
3                  

In [34]:
import requests
from bs4 import BeautifulSoup

# URL of the webpage to scrape
url = r"https://rates.ca/credit-cards/travel"

# Send a GET request to fetch the page content
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content with BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')

    # Print the entire page content
    print("Page Title:", soup.title.string)  # Print the title of the page

    # Extract all text
    print("Full Page Text:")
    print(soup.get_text())

    # If you want to extract all links
    print("\nAll Links on the Page:")
    for link in soup.find_all('a', href=True):
        print(link['href'])

else:
    print(f"Failed to retrieve the page. Status code: {response.status_code}")

Page Title: Canada's Best Travel Credit Cards of 2024
Full Page Text:





Canada's Best Travel Credit Cards of 2024































































Car Insurance 

Car Insurance +−





Car Insurance


Ontario Car Insurance


Alberta Car Insurance


Quebec Car Insurance


Bundle Home & Car
                                        Insurance











Ratesbot


                                            Automated carinsurance shopping
                                        








Home Insurance 

Home Insurance +−





Home Insurance


Alberta Home Insurance


Condo Insurance


Tenant Insurance


Ontario Home Insurance


BC Home Insurance


Contents Insurance

 






Business Insurance 

Business Insurance +−





Commercial Auto


Commercial Property






General Liability


Professional
                                        Liability 


General Contractor Insurance


Pet Care Insurance


Small Business Insurance


Personal Services
             

In [39]:
print(len(soup.get_text()))

66713


In [35]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager

# Set options for headless browsing (optional)
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")

# Automatically manage ChromeDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Load the webpage
url = r"https://rates.ca/credit-cards/travel"
driver.get(url)

# Extract and measure the length of the HTML
html = driver.page_source
html_length = len(html)

# Print the length of the HTML content
print(f"Length of HTML content: {html_length} characters")



driver.quit()


Length of HTML content: 1128745 characters


In [36]:
open('file.txt', 'w').close()

with open('/Users/aaryas127/Documents/GitHub/credit_card_reward_maximizer/data_storage/html.txt', 'w') as f:
    f.write(html)

In [None]:
# import faiss
# import numpy as np
# from sentence_transformers import SentenceTransformer
# from transformers import GPT2LMHeadModel, GPT2Tokenizer

# # Read the file content
# with open("html.txt", "r") as file:
#     file_content = file.read()

# # Split the content into chunks (e.g., sentences or paragraphs)
# file_chunks = file_content.split("\n")

# # Initialize the embedding model (using sentence-transformers)
# embedder = SentenceTransformer('all-MiniLM-L6-v2')

# # Embed each chunk of the file
# embeddings = embedder.encode(file_chunks)

# # Initialize FAISS index for similarity search
# index = faiss.IndexFlatL2(embeddings.shape[1])  # L2 distance metric
# index.add(np.array(embeddings))  # Add the embeddings to the index

# # Example query
# query = "What is the main topic of the text?"

# # Embed the query
# query_embedding = embedder.encode([query])

# # Retrieve the most similar chunk using FAISS
# D, I = index.search(np.array(query_embedding), k=1)  # k=1 for top-1 closest match

# # Get the closest chunk from the file
# closest_chunk = file_chunks[I[0][0]]

# # Initialize a generative model (GPT-2 example)
# tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
# model = GPT2LMHeadModel.from_pretrained("gpt2")

# # Prepare the input for the model
# input_text = f"Question: {query}\nAnswer: {closest_chunk}"
# inputs = tokenizer(input_text, return_tensors="pt")

# # Generate an answer using the model
# output = model.generate(inputs['input_ids'], max_length=50)
# generated_answer = tokenizer.decode(output[0], skip_special_tokens=True)

# print(generated_answer)


RecursionError: maximum recursion depth exceeded

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

In [41]:
# Reading the entire file content
with open("/Users/aaryas127/Documents/GitHub/credit_card_reward_maximizer/api.txt", 'r') as file:
    content = file.read()

from openai import OpenAI

client = OpenAI(
  base_url = "https://integrate.api.nvidia.com/v1",
  api_key = str(content)
)

completion = client.chat.completions.create(
  model="nvidia/llama-3.1-nemotron-70b-instruct",
  messages=[{"role":"user","content":soup.get_text()}],
  temperature=0.2,
  top_p=0.7,
  max_tokens=1024,
  stream=True
)

for chunk in completion:
  if chunk.choices[0].delta.content is not None:
    print(chunk.choices[0].delta.content, end="")

Here's a summary of the article "Canada's Best Travel Credit Cards of 2024":

**Top Travel Credit Cards in Canada for 2024:**

1. **Best Overall Travel Credit Card:**
	* **CIBC Aventura Visa Infinite Card**
	* First-year value: $1,536
	* Credit score required: Excellent (≥720)
	* Top features: NEXUS Application Fee Rebate, complimentary lounge visits, and travel insurance
2. **Best Credit Card for Earning Travel Points:**
	* **American Express Cobalt Credit Card**
	* First-year value: $1,361
	* Credit score required: Fair (<660)
	* Top features: Flexible rewards points, travel insurance, and no foreign transaction fees
3. **Best Credit Card with No Foreign Transaction Fees:**
	* **Scotiabank Gold American Express Card**
	* First-year value: $1,194
	* Credit score required: Good (660+)
	* Top features: No foreign transaction fees, flexible Scotia rewards program, and travel insurance
4. **Best No-Fee Travel Credit Card:**
	* **MBNA Rewards Platinum Plus Mastercard**
	* First-year value:

In [None]:
# Cashback


In [None]:
# Travel Benefits

completion = client.chat.completions.create(
  model="meta/llama-3.1-405b-instruct",
  messages=[{"role":"user","content":"Does the Desjardins Flexi Visa have any travel benefits?"}],
  temperature=0.2,
  top_p=0.7,
  max_tokens=1024,
  stream=True
)

for chunk in completion:
  if chunk.choices[0].delta.content is not None:
    print(chunk.choices[0].delta.content, end="")

In [None]:
# Signup Bonus, Points, Spending requirement

completion = client.chat.completions.create(
  model="meta/llama-3.1-405b-instruct",
  messages=[{"role":"user","content":"Does the Desjardins Flexi Visa have the ability to get you any sort of points?"}],
  temperature=0.2,
  top_p=0.7,
  max_tokens=1024,
  stream=True
)

for chunk in completion:
  if chunk.choices[0].delta.content is not None:
    print(chunk.choices[0].delta.content, end="")

In [None]:
# Insurance

In [None]:
# Annual fee...first year free?