In [15]:
import pandas as pd
import wikipediaapi
import os
import groq
from dotenv import load_dotenv

# Create a User-Agent header with info about your application
user_agent = "FinalProject/1.0 (baruchgottesman@gmail.com)"  # Replace with your info

# Create the Wikipedia API object with the user agent
wiki_api = wikipediaapi.Wikipedia(
    language='en',
    user_agent=user_agent
)
bird_species = ["White-faced Whistling", "Black-bellied Whistling", "West Indian Whistling", "Fulvous Whistling", "Emperor Goose",
]

    # "Snow Goose", "Ross's Goose", "Graylag Goose", "Greater White-fronted Goose", "Lesser White-fronted Goose", "Taiga Bean-goose",
    # "Tundra Bean-goose", "Pink-footed Goose", "Brant", "Barnacle Goose", "Cackling Goose", "Canada Goose", "Hawaiian Goose",\
    # "Mute Swan", "Black Swan", "Trumpeter Swan", "Tundra Swan", "Whooper Swan", "Egyptian Goose", "Common Shelduck", "Muscovy Duck",
    # "Wood Duck", "Baikal Teal", "Garganey", "Blue-winged Teal", "Cinnamon Teal", "Northern Shoveler", "Gadwall", "Falcated Duck",
    # "Eurasian Wigeon", "American Wigeon", "Pacific Black Duck", "Laysan Duck", "Hawaiian Duck", "Eastern Spot-billed Duck",
    # "Mallard", "Mexican Duck", "American Black Duck", "Mottled Duck", "White-cheeked Pintail", "Northern Pintail", "Green-winged Teal",
    # "Canvasback", "Redhead", "Common Pochard", "Ring-necked Duck", "Tufted Duck", "Greater Scaup", "Lesser Scaup", "Steller's Eider",
    # "Spectacled Eider", "King Eider", "Common Eider", "Harlequin Duck", "Labrador Duck", "Surf Scoter", "Velvet Scoter",
    # "White-winged Scoter", "Stejneger's Scoter", "Common Scoter", "Black Scoter", "Long-tailed Duck", "Bufflehead",
    # "Common Goldeneye", "Barrow's Goldeneye", "Smew", "Hooded Merganser", "Common Merganser", "Red-breasted Merganser",
    # "Masked Duck", "Ruddy Duck"

In [17]:
# Data storage for species and Wikipedia text
data = []

# Function to get Wikipedia text with name variations
def get_wikipedia_text(species):
    # Split the species name into individual words for more flexible capitalization control
    words = species.split()

    # List of variations based on potential naming patterns
    variations = [
        species,                                 # Original name
        species + " bird",                       # Add "bird"
        species + " duck",                       # Add "duck"
        species.replace(" ", "_"),               # Replace space with underscore
        species.replace(" ", "-"),               # Replace space with dash
        species.lower(),                         # Lowercase the original name
        species.lower() + " bird",               # Lowercase with "bird"
        species.lower() + " duck",               # Lowercase with "duck"
        "_".join(words).lower() + "_duck",       # Lowercase words with "_duck"
        "-".join(words).lower() + "-duck",       # Lowercase words with "-duck"
        "_".join(word.capitalize() for word in words),   # Capitalize each word, join with underscore
        "-".join(word.capitalize() for word in words),   # Capitalize each word, join with dash
        "_".join(words) + "_duck",               # Original case with "_duck"
        "-".join(words) + "-duck",               # Original case with "-duck"
        "_".join(words) + "_bird",               # Original case with "_bird"
        "-".join(words) + "-bird",               # Original case with "-bird"
        " ".join(word.capitalize() for word in words) + " Duck", # Capitalized with "Duck"
        " ".join(word.capitalize() for word in words) + " Bird"  # Capitalized with "Bird"
    ]

    # Attempt each variation until a page is found
    for name in variations:
        page = wiki_api.page(name)
        if page.exists():
            return page.text  # Return the text if the page exists

    return "Wikipedia page not found"  # Return a message if no variation was successful

# Fetch Wikipedia content for each bird species
for species in bird_species:
    wiki_text = get_wikipedia_text(species)
    data.append([species, wiki_text])

# Create DataFrame
df = pd.DataFrame(data, columns=["Species Name", "Wikipedia Text"])

# Display the DataFrame to the user
df.head()

Unnamed: 0,Species Name,Wikipedia Text
0,White-faced Whistling,The white-faced whistling duck (Dendrocygna vi...
1,Black-bellied Whistling,The black-bellied whistling duck (Dendrocygna ...
2,West Indian Whistling,The West Indian whistling duck (Dendrocygna ar...
3,Fulvous Whistling,The fulvous whistling duck or fulvous tree duc...
4,Emperor Goose,"The emperor goose (Anser canagicus), also know..."


In [3]:
import requests
import pandas as pd
import os
import groq
from dotenv import load_dotenv

# Bing Search API configuration

load_dotenv("BING_API_KEY.env")
BING_API_KEY = os.getenv("BING_API_KEY")
BING_URL = "https://api.bing.microsoft.com/v7.0/search"

# Function to perform a Bing search and retrieve up to 4 unique text snippets from the first 10 results
def get_unique_bing_search_results(species):
    search_text = f"{species} bird"
    headers = {"Ocp-Apim-Subscription-Key": BING_API_KEY}
    params = {"q": search_text, "count": 10}  # Fetch up to 10 results

    # Request search results from Bing API
    response = requests.get(BING_URL, headers=headers, params=params)
    
    if response.status_code == 200:
        search_data = response.json()
        unique_snippets = []
        
        # Extract and filter unique snippets
        for result in search_data.get("webPages", {}).get("value", [])[:10]:
            snippet = result.get("snippet", "")
            if snippet and snippet not in unique_snippets:
                unique_snippets.append(snippet)
            
            # Stop if we have 4 unique snippets
            if len(unique_snippets) >= 4:
                break

        # Fill with NULL if fewer than 4 unique snippets are found
        while len(unique_snippets) < 4:
            unique_snippets.append(None)
        
        return unique_snippets
    else:
        # Return placeholders if there's an error
        return [None, None, None, None]

# Assuming you already have an existing DataFrame `df` with a "Species Name" column
# We will add the Bing results as new columns to this DataFrame

# Loop through each species in the existing DataFrame and fetch Bing search results
for i, species in df["Species Name"].items():
    bing_results = get_unique_bing_search_results(species)
    # Assign each unique Bing result to new columns in the DataFrame
    df.at[i, "Bing Result 1"] = bing_results[0]
    df.at[i, "Bing Result 2"] = bing_results[1]
    df.at[i, "Bing Result 3"] = bing_results[2]
    df.at[i, "Bing Result 4"] = bing_results[3]

# Display the updated DataFrame to verify the added columns
df.head()


Unnamed: 0,Species Name,Wikipedia Text,Bing Result 1,Bing Result 2,Bing Result 3,Bing Result 4
0,White-faced Whistling,The white-faced whistling duck (Dendrocygna vi...,Identification. Striking duck with black-and-w...,The white-faced whistling duck (Dendrocygna vi...,The white-faced whistling duck (Dendrocygna vi...,The white-faced whistling duck (Dendrocygna vi...
1,Black-bellied Whistling,The black-bellied whistling duck (Dendrocygna ...,The Black-bellied Whistling-Duck is a boistero...,"They look most like ducks, but their lack of s...",Climate Threats Facing the Black-bellied Whist...,A black-bellied whistling duck in the water. T...
2,West Indian Whistling,The West Indian whistling duck (Dendrocygna ar...,A distinctive large waterbird between the size...,The West Indian whistling duck (Dendrocygna ar...,Appearance. The West Indian whistling duck is ...,West Indian whistling ducks are medium-sized d...
3,Fulvous Whistling,The fulvous whistling duck or fulvous tree duc...,Whistling-ducks are a distinctive group of abo...,"At a Glance. A lanky bird of shallow wetlands,...",The fulvous whistling duck or fulvous tree duc...,The Fulvous Whistling-Duck is a mix of rich ca...
4,Emperor Goose,"The emperor goose (Anser canagicus), also know...","The beautiful Emperor Goose is a small, scarce...","The beautiful Emperor Goose is a small, scarce...",Explore Emperor Goose. Exotic species. Stocky ...,Climate Threats Facing the Emperor Goose. Choo...


In [19]:


# Load environment variables
load_dotenv("GROQ_API_KEY.env")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# Initialize Groq client
client = groq.Groq(api_key=GROQ_API_KEY)

# Function to send combined text to Groq and retrieve fun facts
def get_fun_facts(species_text, sources):
    # Prepare the prompt for Groq
    prompt = f"""
    Given the following information about a bird species, extract 5 unique, fun, and interesting facts.
    These facts should be unusual, unexpected, or specific to this species. Please provide each fact in 
    a numbered list and specify which source it is derived from (e.g., Wikipedia, Bing).

    Text:
    {species_text}

    Format:
    1. Fact (source: Wikipedia)
    2. Fact (source: Bing)
    ... up to 5 facts
    """

    try:
        # Send the prompt to Groq and get the response
        response = client.chat.completions.create(
            messages=[
                {"role": "user", "content": prompt}
            ],
            model="mixtral-8x7b-32768",
            temperature=0.5,
        )
        # Parse the response into individual facts with sources
        facts = response.choices[0].message.content.strip().splitlines()
        return facts
    except Exception as e:
        print(f"Error retrieving fun facts from Groq: {str(e)}")
        return ["Error fetching fun facts"] * 5  # Return placeholders if an error occurs

# Assuming `df` is the DataFrame created from previous steps with columns "Species Name", "Wikipedia Text",
# "Bing Result 1", "Bing Result 2", "Bing Result 3", "Bing Result 4"

# Initialize columns for Fun Facts and their sources
for i in range(1, 6):
    df[f"Fun Fact {i}"] = None
    df[f"Source for Fun Fact {i}"] = None

# Process each species in the DataFrame
for i, row in df.iterrows():
    # Concatenate all available texts
    species_name = row["Species Name"]
    wikipedia_text = row["Wikipedia Text"] or ""
    bing_texts = [row.get(f"Bing Result {j}") or "" for j in range(1, 5)]
    combined_text = f"Species: {species_name}\n\nWikipedia:\n{wikipedia_text}\n\n" + "\n\n".join([f"Bing Result {j+1}:\n{text}" for j, text in enumerate(bing_texts)])

    # Get fun facts from Groq
    fun_facts = get_fun_facts(combined_text, sources=["Wikipedia"] + ["Bing"] * 4)

    # Parse and store each fact with its source in the DataFrame
    for fact_num, fact in enumerate(fun_facts[:5], 1):
        if fact and "(source:" in fact:
            fact_text, source_text = fact.split("(source:", 1)
            df.at[i, f"Fun Fact {fact_num}"] = fact_text.strip()
            df.at[i, f"Source for Fun Fact {fact_num}"] = "Wikipedia" if "Wikipedia" in source_text else "Bing"
        else:
            # In case of formatting issues, we put the entire fact in the "Fun Fact" column and leave source blank
            df.at[i, f"Fun Fact {fact_num}"] = fact
            df.at[i, f"Source for Fun Fact {fact_num}"] = None

# Display the DataFrame with Fun Facts to verify
df.head()


Unnamed: 0,Species Name,Wikipedia Text,Fun Fact 1,Source for Fun Fact 1,Fun Fact 2,Source for Fun Fact 2,Fun Fact 3,Source for Fun Fact 3,Fun Fact 4,Source for Fun Fact 4,Fun Fact 5,Source for Fun Fact 5
0,White-faced Whistling,The white-faced whistling duck (Dendrocygna vi...,1. The white-faced whistling duck's scientific...,Wikipedia,2. The white-faced whistling duck's call is a ...,Wikipedia,3. The white-faced whistling duck's disjunctiv...,Wikipedia,4. A study of white-faced whistling duck behav...,Bing,5. Individual and sexual differences have been...,Bing
1,Black-bellied Whistling,The black-bellied whistling duck (Dendrocygna ...,1. The black-bellied whistling duck is known f...,Wikipedia,2. These ducks have a unique preference for ne...,Wikipedia,3. The black-bellied whistling duck's diet is ...,Bing,4. The black-bellied whistling duck's call is ...,Bing,5. The species is widely distributed in the Am...,Wikipedia
2,West Indian Whistling,The West Indian whistling duck (Dendrocygna ar...,1. The West Indian whistling duck is the large...,Wikipedia,2. This species has a unique hunting behavior ...,Bing,3. West Indian whistling ducks are known for t...,Wikipedia,4. These ducks are mostly nocturnal and secret...,Bing,5. The West Indian whistling duck has suffered...,Wikipedia
3,Fulvous Whistling,The fulvous whistling duck or fulvous tree duc...,1. The fulvous whistling duck forms a superspe...,,2. The fulvous whistling duck is an early spli...,,3. The fulvous whistling duck is a long-legged...,,4. Fulvous whistling ducks are found in lowlan...,,5. This species has strong colonizing tendenci...,
4,Emperor Goose,"The emperor goose (Anser canagicus), also know...",1. The Emperor Goose's head frequently turns a...,Wikipedia,2. The Emperor Goose's vocalizations have a mo...,Wikipedia,3. The Emperor Goose is one of the most unsoci...,Bing,4. The Emperor Goose's flesh has a strong flav...,Bing,5. The Emperor Goose's population has been exp...,Bing


In [5]:
# Assuming the `df` DataFrame has been populated with the Fun Facts and sources as described

# Add a "Number" column as a unique identifier (based on the index)
df.insert(0, "Number", range(1, len(df) + 1))

# Define the desired column order
column_order = (
    ["Number", "Species Name"] +
    [f"Fun Fact {i}" for i in range(1, 6)] +
    [f"Source for Fun Fact {i}" for i in range(1, 6)] +
    ["Wikipedia Text"] +
    [f"Bing Result {i}" for i in range(1, 5)]
)

# Reorder the DataFrame based on the specified column order
df = df[column_order]

# Display the rearranged DataFrame to verify the new order
df.head()


Unnamed: 0,Number,Species Name,Fun Fact 1,Fun Fact 2,Fun Fact 3,Fun Fact 4,Fun Fact 5,Source for Fun Fact 1,Source for Fun Fact 2,Source for Fun Fact 3,Source for Fun Fact 4,Source for Fun Fact 5,Wikipedia Text,Bing Result 1,Bing Result 2,Bing Result 3,Bing Result 4
0,1,White-faced Whistling,1. The white-faced whistling duck's scientific...,2. White-faced whistling ducks are known for t...,3. These ducks are highly social and can form ...,4. The white-faced whistling duck is a noisy s...,5. The white-faced whistling duck is a widely ...,Wikipedia,Bing,Bing,Bing,Bing,The white-faced whistling duck (Dendrocygna vi...,Identification. Striking duck with black-and-w...,The white-faced whistling duck (Dendrocygna vi...,The white-faced whistling duck (Dendrocygna vi...,The white-faced whistling duck (Dendrocygna vi...
1,2,Black-bellied Whistling,1. The black-bellied whistling duck is unique ...,,2. The oldest recorded Black-bellied Whistling...,,3. The black-bellied whistling duck's call is ...,Wikipedia,,Bing,,Bing,The black-bellied whistling duck (Dendrocygna ...,The Black-bellied Whistling-Duck is a boistero...,"They look most like ducks, but their lack of s...",Climate Threats Facing the Black-bellied Whist...,A black-bellied whistling duck in the water. T...
2,3,West Indian Whistling,1. The West Indian whistling duck is the large...,,2. West Indian whistling ducks are medium-size...,,3. The West Indian whistling duck is mostly no...,Wikipedia,,Bing,,Wikipedia,The West Indian whistling duck (Dendrocygna ar...,A distinctive large waterbird between the size...,The West Indian whistling duck (Dendrocygna ar...,Appearance. The West Indian whistling duck is ...,West Indian whistling ducks are medium-sized d...
3,4,Fulvous Whistling,1. The fulvous whistling duck is part of the w...,,2. Fulvous whistling ducks are known for their...,,3. The fulvous whistling duck has a very large...,Wikipedia,,Bing,,Wikipedia,The fulvous whistling duck or fulvous tree duc...,Whistling-ducks are a distinctive group of abo...,"At a Glance. A lanky bird of shallow wetlands,...",The fulvous whistling duck or fulvous tree duc...,The Fulvous Whistling-Duck is a mix of rich ca...
4,5,Emperor Goose,"1. The Emperor Goose has a unique ""scaled appe...",2. This goose species has a striking appearanc...,"3. Unlike many goose species, the Emperor Goos...",4. The Emperor Goose is known for its intricat...,5. The Emperor Goose's diet primarily consists...,Wikipedia,Bing,Wikipedia,Bing,Wikipedia,"The emperor goose (Anser canagicus), also know...","The beautiful Emperor Goose is a small, scarce...","The beautiful Emperor Goose is a small, scarce...",Explore Emperor Goose. Exotic species. Stocky ...,Climate Threats Facing the Emperor Goose. Choo...


In [6]:
# Export the rearranged DataFrame to a CSV file
df.to_csv("Fun Facts.csv", index=False)

print("Data has been successfully exported to Fun Facts.csv")


Data has been successfully exported to Fun Facts.csv
