In [1]:
import sys
from pathlib import Path

sys.path.insert(0, "workspace/scrape")
sys.path.insert(0, "workspace/scrape/jobs/google")

import pandas as pd
import re
from datetime import date
from gspread_utils import init_gspread
from io import StringIO
import csv


In [2]:
gc = init_gspread()
sheet_name = "Google Jobs"
sh = gc.open(sheet_name)
def load_previous_data():
    """Load the most recent previous data from a Google Sheet.

    Finds the tab whose name matches YYYY-MM-DD(.n)? and returns the
    one that sorts greatest (latest date, then highest suffix).
    """


    date_re = re.compile(r"^(\d{4}-\d{2}-\d{2})(?:\.(\d+))?$")
    candidates = []
    for ws in sh.worksheets():
        m = date_re.match(ws.title)
        if m:
            sheet_date = date.fromisoformat(m.group(1))
            suffix = int(m.group(2)) if m.group(2) else 0
            candidates.append((sheet_date, suffix, ws))

    if not candidates:
        raise RuntimeError(f"No date-named sheet found in '{sheet_name}'")

    candidates.sort(key=lambda x: (x[0], x[1]), reverse=True)
    prev_date, _, prev_ws = candidates[0]
    print(f"Most recent sheet: '{prev_ws.title}'")

    return pd.DataFrame(prev_ws.get_all_records()), prev_date

# 1) Process Initial Scrape

In [8]:
df = pd.read_csv('workspace/scrape/jobs/google/data/google_jobs_raw.csv')

df['Additional locations'] = df['Location'].str.contains('more')
df['Role'] = df['Job Title'].str.split(',').str[0]
df['Team'] = df['Job Title'].str.split(',', n=1).str[1]
df['Excluded Role'] = df['Role'].str.contains('Intern|Manager|Sales|Architect|Technician|Director|Customer Engineer|Design|Validation Engineer|UX|Consultant|Scientist|Business|Financial|Negotiator|Executive|Specialist|Operations|Researcher|Strateg|Principal|Security|Associate|Test Engineer|Analy|Quality|Partner|Writer|Global|Privacy|VP|Reliability|SRE') | df["Job Title"].str.contains('PhD')
df['Clean Location'] = df['Location'].str.replace('place', '').str.replace('\n', '')
df['Included Role'] = ~df['Excluded Role'] & df['Role'].str.contains('Software|Technical Lead|Cloud Engineer|Solutions Engineer|Machine Learning Engineer') & ~df['Role'].str.contains(r"( iOS |\(iOS\))", regex=True)
df['California'] = df['Clean Location'].str.contains('CA')
df['Location possible'] = df['California'] | df['Additional locations']

  df['Included Role'] = ~df['Excluded Role'] & df['Role'].str.contains('Software|Technical Lead|Cloud Engineer|Solutions Engineer|Machine Learning Engineer') & ~df['Role'].str.contains(r"( iOS |\(iOS\))", regex=True)


In [9]:
df['Location possible'].value_counts()

Location possible
True     1446
False       6
Name: count, dtype: int64

In [10]:
df[~df['Location possible']]

Unnamed: 0,Job Title,Location,Link,Additional locations,Role,Team,Excluded Role,Clean Location,Included Role,California,Location possible
107,Community Affairs Engagement Representative,"place\nCalifornia, USA",https://www.google.com/about/careers/applicati...,False,Community Affairs Engagement Representative,,False,"California, USA",False,False,False
231,Aircraft Captain and Project Manager,"place\nCalifornia, USA",https://www.google.com/about/careers/applicati...,False,Aircraft Captain and Project Manager,,True,"California, USA",False,False,False
1352,"External Affairs Manager, US Federal Affairs","place\nCalifornia, USA",https://www.google.com/about/careers/applicati...,False,External Affairs Manager,US Federal Affairs,True,"California, USA",False,False,False
1393,"Policy Senior Analyst, Government Affairs and ...","place\nCalifornia, USA",https://www.google.com/about/careers/applicati...,False,Policy Senior Analyst,Government Affairs and Public Policy,True,"California, USA",False,False,False
1431,"Senior Red Team Security Consultant, Mandiant,...","place\nTexas, USA\n; United States",https://www.google.com/about/careers/applicati...,False,Senior Red Team Security Consultant,"Mandiant, Google Cloud",True,"Texas, USA; United States",False,False,False
1445,"Red Team Security Consultant, Mandiant, Google...","place\nTexas, USA\n; United States",https://www.google.com/about/careers/applicati...,False,Red Team Security Consultant,"Mandiant, Google Cloud",True,"Texas, USA; United States",False,False,False


In [11]:
df['California'].value_counts()

California
True     1363
False      89
Name: count, dtype: int64

In [12]:
df['Included Role'].value_counts()


Included Role
False    1038
True      414
Name: count, dtype: int64

In [13]:
df[~df['Included Role']]['Role'].value_counts()

Role
Technical Program Manager III                   49
Senior Product Manager                          22
Technical Program Manager                       20
Product Manager II                              18
Software Engineering Manager II                 15
                                                ..
Associate Manager                                1
Real Time Operations Systems Program Manager     1
Red Team Security Consultant                     1
Senior Privacy and Security Manager              1
System Performance Power Architect               1
Name: count, Length: 561, dtype: int64

In [14]:
df_possible = df[df['Included Role'] & df['Location possible']] 
df_possible.drop(columns=['Location', 'Excluded Role', 'Included Role', 'Location possible', 'California'], inplace=True)

In [15]:
df_possible.to_csv('workspace/scrape/jobs/google/data/google_jobs.csv', index=False)

In [16]:
df_latest = df_possible

# 4) Process Enhanced

In [60]:
df = pd.read_csv('workspace/scrape/jobs/google/data/google_details_raw.csv')
print(len(df))

# Remove headers
df['about'] = df['about'].str.replace('About the job\n', '')
df['responsibilities'] = df['responsibilities'].str.replace('Responsibilities\n', '')
df['qualifications'] = df['qualifications'].str.replace(r'info_outline', "", regex=True)
df['qualifications'] = df['qualifications'].str.replace('\r\n', '\n', regex=False)
df['qualifications'] = df['qualifications'].str.replace('\n\n', '\n', regex=False)
df['qualifications'] = df['qualifications'].str.replace(' \n', '\n', regex=False)
df['qualifications'] = df['qualifications'].str.replace('\n ', '\n', regex=False)
df['qualifications'] = df['qualifications'].str.replace('Note:', "")

# Remove legal notices
df['qualifications'] = df['qualifications'].str.replace("This posting is for a new vacancy.", "")
df['qualifications'] = df['qualifications'].str.replace("This posting is for an existing vacancy.", "")
df['qualifications'] = df['qualifications'].str.replace("This role may also be located in our Playa Vista, CA campus.", "")
df['qualifications'] = df['qualifications'].str.replace('Applicants in the County of Los Angeles: Qualified applications with arrest or conviction records will be considered for employment in accordance with the Los Angeles County Fair Chance Ordinance for Employers and the California Fair Chance Act.', "")
df['qualifications'] = df['qualifications'].str.replace("In accordance with Washington state law, we are highlighting our comprehensive benefits package, which is available to all eligible US based employees.", "", regex=False)
df['qualifications'] = df['qualifications'].str.replace("Benefits for this role include:\nHealth, dental, vision, life, disability insurance\nRetirement Benefits: 401(k) with company match\nPaid Time Off: 20 days of vacation per year, accruing at a rate of 6.15 hours per pay period for the first five years of employment\nSick Time: 40 hours/year (statutory, where applicable); 5 days/event (discretionary)\nMaternity Leave (Short-Term Disability + Baby Bonding): 28-30 weeks\nBaby Bonding Leave: 18 weeks\nHolidays: 13 paid days per year", "", regex=False)
df['qualifications'] = df['qualifications'].str.replace("Applicants in San Francisco: Qualified applications with arrest or conviction records will be considered for employment in accordance with the San Francisco Fair Chance Ordinance for Employers and the California Fair Chance Act.", "")
df['qualifications'] = df['qualifications'].str.replace("Google's hybrid workplace includes remote and in-office roles.", "")


# Remove application date
application_window_regex = r"The application window will be open until at least ([A-Za-z0-9\s]+), 2026. This opportunity will remain online based on business needs which may be before or after the specified date."
df['application_window'] = df['qualifications'].str.extract(application_window_regex)
df['qualifications'] = df['qualifications'].str.replace(application_window_regex, "", regex=True)

# Remove location specification
# TODO: add location specification to table
df['qualifications'] = df['qualifications'].str.replace("D.C.", "DC")
location_preference_regex = r"By applying to this position you will have an opportunity to share your preferred working location from the following:([A-Za-z,;:ü\s\(\)\-]*)\."
df['Additional locations'] = df['qualifications'].str.contains("share your preferred working location")
df.loc[df['Additional locations'], 'Clean Location'] = df.loc[df['Additional locations'], 'qualifications'].str.extract(location_preference_regex, expand=False)
df['qualifications'] = df['qualifications'].str.replace(location_preference_regex, "", regex=True)
df['qualifications'] = df['qualifications'].str.replace(r"Remote location\(s\): [\w]+.", "", regex=True)
df['qualifications'] = df['qualifications'].str.replace('""', "")

# Remove leading spaces
df['qualifications'] = df['qualifications'].str.replace(r'^[\sX]+', "", regex=True)

75


In [None]:
# Details could have location info, but it does not
# Remove places where LA is the only California option
# df['Clean Location'] = df['Clean Location'].str.replace("Los Angeles, CA", "Los Angeles")
# df['Clean Location'] = df['Clean Location'].str.replace("Irvine, CA", "Los Angeles")
# df = df[df['Clean Location'].str.contains('CA')]
# df['Clean Location'] = df['Clean Location'].str.strip(' \n\r\t\u00A0')
# print(len(df))


In [61]:
df['qualifications'].str.startswith('Minimum').value_counts()

qualifications
True     72
False     3
Name: count, dtype: int64

In [62]:
df.loc[~df['qualifications'].str.startswith('Minimum'), 'qualifications']

14    For Canada, this posting is for an existing va...
58    This is a specialized role which requires phys...
67    For Canada, this posting is for an existing va...
Name: qualifications, dtype: str

In [63]:
df['qualifications'] = df['qualifications'].str.replace(r'^[\s\S]*?(?=Minimum)', '', regex=True)

In [64]:
df['qualifications'].str.startswith('Minimum').value_counts()

qualifications
True    75
Name: count, dtype: int64

In [65]:
df['minimum qual'] = df['qualifications'].str.split('Preferred qualifications:').str[0]
df['preferred qual'] = df['qualifications'].str.split('Preferred qualifications:').str[1]
df['minimum qual'] = df['minimum qual'].str.replace('Minimum qualifications:', "")
df['minimum qual'] = df['minimum qual'].str.strip(' \n\r\t\u00A0')
df['preferred qual'] = df['preferred qual'].str.strip(' \n\r\t\u00A0')
df.drop(columns=['qualifications', 'Additional locations'], inplace=True)

In [66]:
salary_range_regex = r"The US base salary range for this full\-time position is ([\$0-9,\-]+) \+ bonus \+ equity \+ benefits\." 
df['salary range'] = df['about'].str.extract(salary_range_regex)

In [67]:
df['about'] = df['about'].str.replace('Learn more about benefits at Google.', "")
df['about'] = df['about'].str.replace("Please note that the compensation details listed in US role postings reflect the base salary only, and do not include bonus, equity, or benefits.", "")
df['about'] = df['about'].str.replace("Your recruiter can share more about the specific salary range for your preferred location during the hiring process.", "")
df['about'] = df['about'].str.replace("Our salary ranges are determined by role, level, and location.", "")
df['about'] = df['about'].str.replace("Within the range, individual pay is determined by work location and additional factors, including job-related skills, experience, and relevant education or training.", "")
df['about'] = df['about'].str.replace("Google's software engineers develop the next-generation technologies that change how billions of users connect, explore, and interact with information and one another.", "")
df['about'] = df['about'].str.replace("Our products need to handle information at massive scale, and extend well beyond web search.", "")
df['about'] = df['about'].str.replace("We're looking for engineers who bring fresh ideas from all areas, including information retrieval, distributed computing, large-scale system design, networking and data storage, security, artificial intelligence, natural language processing, UI design and mobile; the list goes on and is growing every day. As a software engineer, you will work on a specific project critical to Google’s needs with opportunities to switch teams and projects as you and our fast-paced business grow and evolve. We need our engineers to be versatile, display leadership qualities and be enthusiastic to take on new problems across the full-stack as we continue to push technology forward.", "")
df['about'] = df['about'].str.replace("Google Cloud accelerates every organization’s ability to digitally transform its business and industry.", "")
df['about'] = df['about'].str.replace("We deliver enterprise-grade solutions that leverage Google’s cutting-edge technology, and tools that help developers build more sustainably.", "")
df['about'] = df['about'].str.replace("Customers in more than 200 countries and territories turn to Google Cloud as their trusted partner to enable growth and solve their most critical business problems.", "")
df['about'] = df['about'].str.replace("With your technical expertise you will manage project priorities, deadlines, and deliverables. You will design, develop, test, deploy, maintain, and enhance software solutions.", "")
df['about'] = df['about'].str.replace(salary_range_regex, "", regex=True)
df['about'] = df['about'].str.replace("Google's mission is to organize the world's information and make it universally accessible and useful.", "")
df['about'] = df['about'].str.replace("Google is an engineering company at heart. We hire people with a broad set of technical skills who are ready to take on some of technology's greatest challenges and make an impact on users around the world. At Google, engineers not only revolutionize search, they routinely work on scalability and storage solutions, large-scale applications and entirely new platforms for developers around the world. From Google Ads to Chrome, Android to YouTube, social to local, Google engineers are changing the world one technological achievement after another.", "")
df['about'] = df['about'].str.strip(' \n\r\t\u00A0')
df['about'].head()

0    Behind everything our users see online is the ...
1    Site Reliability Engineering (SRE) combines so...
2    The team's mission is to engineer and optimize...
3    For decades, the computing revolution has resh...
4    Google Ads is helping power the open internet ...
Name: about, dtype: str

In [68]:
df['responsibilities'] = df['responsibilities'].str.replace("product or system development ", "")

In [69]:
def update_quals(to_replace, replace_with="", regex=False):
    df['minimum qual'] = df['minimum qual'].str.replace(to_replace, replace_with, regex=regex)
    df['preferred qual'] = df['preferred qual'].str.replace(to_replace, replace_with, regex=regex)

update_quals('year(s)? of experience', "YOE", True)
update_quals("data structures( and | or |/)algorithms", "DS&A", True)

"Bachelor’s degree or equivalent practical experience."
bach_regex = r"Bachelor['’]s degree( in Computer Science)? or equivalent practical experience."
df['Bachelors minimum'] = df['minimum qual'].str.contains(bach_regex, regex=True)
update_quals(bach_regex, "", True)

masters_regex = r"Master['’]s degree or PhD in (Engineering, )?Computer Science(,)? or (a )?related technical field(s)?."
df['Masters pref'] = df['preferred qual'].str.contains(masters_regex, regex=True)
update_quals(masters_regex, "", True)

accessibility_literal = "Experience developing accessible technologies."
df['Accessibility pref'] = df['preferred qual'].str.contains(accessibility_literal)
update_quals(accessibility_literal, "")

df['minimum qual'] = df['minimum qual'].str.strip(' \n\r\t\u00A0')
df['preferred qual'] = df['preferred qual'].str.strip(' \n\r\t\u00A0')
df[['minimum qual', 'preferred qual']]

  df['Bachelors minimum'] = df['minimum qual'].str.contains(bach_regex, regex=True)
  df['Masters pref'] = df['preferred qual'].str.contains(masters_regex, regex=True)


Unnamed: 0,minimum qual,preferred qual
0,8 YOE in software development.\n5 YOE building...,8 YOE with DS&A.\n3 YOE in a technical leaders...
1,"Bachelor’s degree in Computer Science, a relat...",Master's degree in Computer Science or Enginee...
2,2 YOE with software development in one or more...,Master's degree or PhD in computer science or ...
3,2 YOE with software development in Kotlin.\n2 ...,2 YOE with DS&A.
4,5 YOE with software development in one or more...,5 YOE with DS&A.\n1 YOE in a technical leaders...
...,...,...
70,"2 YOE programming in C++, or 1 YOE with an adv...",Experience with database internals and diagnos...
71,"8 YOE in software development.\n5 YOE testing,...",Experience in optimizing ML models for inferen...
72,8 YOE in software development.\n5 YOE with one...,8 YOE with DS&A.\n3 YOE in a technical leaders...
73,8 YOE in software development.\n5 YOE with ful...,8 YOE with DS&A.\n3 YOE in a technical leaders...


In [70]:
team_notes = [sh for sh in sh.worksheets() if sh.title == "Team Notes"][0]
team_notes_df = pd.DataFrame(team_notes.get_all_records())
for row in team_notes_df.itertuples():
    df['about'] = df['about'].str.replace(row.Note, row.Team)

In [71]:
df.drop(columns=['Unnamed: 0'], inplace=True)

In [72]:
df.sort_values(by='salary range')
len(df)

75

In [73]:
df

Unnamed: 0,about,responsibilities,Link,application_window,Clean Location,minimum qual,preferred qual,salary range,Bachelors minimum,Masters pref,Accessibility pref
0,Behind everything our users see online is the ...,Provide technical leadership on high-impact pr...,https://www.google.com/about/careers/applicati...,,,8 YOE in software development.\n5 YOE building...,8 YOE with DS&A.\n3 YOE in a technical leaders...,"$197,000-$291,000",True,True,False
1,Site Reliability Engineering (SRE) combines so...,Write code.\nReview code developed by other de...,https://www.google.com/about/careers/applicati...,,,"Bachelor’s degree in Computer Science, a relat...",Master's degree in Computer Science or Enginee...,"$118,000-$170,000",False,False,False
2,The team's mission is to engineer and optimize...,Design and implement low-level firmware module...,https://www.google.com/about/careers/applicati...,,,2 YOE with software development in one or more...,Master's degree or PhD in computer science or ...,"$141,000-$202,000",True,False,True
3,"For decades, the computing revolution has resh...","Write code.\nParticipate in, or lead design re...",https://www.google.com/about/careers/applicati...,,,2 YOE with software development in Kotlin.\n2 ...,2 YOE with DS&A.,"$141,000-$202,000",True,True,True
4,Google Ads Team,Write and test code. \n\nCollaborate with peer...,https://www.google.com/about/careers/applicati...,,"Pittsburgh, PA, USA; Mountain View, CA, USA; ...",5 YOE with software development in one or more...,5 YOE with DS&A.\n1 YOE in a technical leaders...,"$166,000-$244,000",True,True,True
...,...,...,...,...,...,...,...,...,...,...,...
70,Spanner Batch Team empowers customers to perfo...,"Write and test code.\nParticipate in, or lead ...",https://www.google.com/about/careers/applicati...,,,"2 YOE programming in C++, or 1 YOE with an adv...",Experience with database internals and diagnos...,"$141,000-$202,000",True,True,False
71,EdgeTPU is a family of embedded machine learni...,"Work as part of the EdgeTPU compiler team, bui...",https://www.google.com/about/careers/applicati...,,"Mountain View, CA, USA; Kirkland, WA, USA","8 YOE in software development.\n5 YOE testing,...",Experience in optimizing ML models for inferen...,"$197,000-$291,000",True,True,False
72,"As a key part of Google's Cloud, Applied AI is...",Develop and implement investigative tools for ...,https://www.google.com/about/careers/applicati...,,"New York, NY, USA; Sunnyvale, CA, USA",8 YOE in software development.\n5 YOE with one...,8 YOE with DS&A.\n3 YOE in a technical leaders...,"$197,000-$291,000",True,True,False
73,Our mission is to guide users through the stor...,Work alongside area tech leads and product man...,https://www.google.com/about/careers/applicati...,,,8 YOE in software development.\n5 YOE with ful...,8 YOE with DS&A.\n3 YOE in a technical leaders...,"$197,000-$291,000",True,True,False


In [None]:
df = df.astype(str)
df['Enhanced'] = "TRUE"

In [75]:
df_main, date = load_previous_data()
df_main = df_main.set_index("Link")
df = df.set_index("Link")
df_main = df_main[~df_main.index.duplicated(keep="last")]
df = df[~df.index.duplicated(keep="last")]

# Save Clean Location before overwriting
clean_loc_backup = df_main["Clean Location"].copy()

# Overwrite all matching columns in df_main with values from df
df_main.update(df)

# Restore Clean Location where df had blank/missing values
if "Clean Location" in df.columns:
    blank = df["Clean Location"].isna() | (df["Clean Location"].astype(str).str.strip() == "")
    df_main.loc[blank.reindex(df_main.index, fill_value=True), "Clean Location"] = clean_loc_backup

df_main = df_main.reset_index()
df = df.reset_index()

Most recent sheet: '2026-02-19'


In [76]:
len(df_main)

475

In [77]:
df_main.drop(columns=['Job Title', 'Additional locations'], inplace=True)
df_main.sort_values(by='Closed', inplace=True)

In [78]:
sheet_name = date.today().isoformat()
existing = {w.title for w in sh.worksheets()}
if sheet_name in existing:
    n = 2
    while f"{sheet_name}.{n}" in existing:
        n += 1
    sheet_name = f"{sheet_name}.{n}"

ws = sh.add_worksheet(title=sheet_name, rows=len(df_main) + 1, cols=len(df_main.columns))

csv_buf = StringIO()
df_main.to_csv(csv_buf, index=False)
rows = list(csv.reader(StringIO(csv_buf.getvalue())))
ws.update(rows, value_input_option="USER_ENTERED")
print(f"Uploaded {len(rows)-1} rows to new sheet '{sheet_name}'")

Uploaded 475 rows to new sheet '2026-02-19.2'


# 2) Merge in previous

In [17]:
df_prev, prev_date = load_previous_data()

Most recent sheet: '2026-02-14'


In [18]:
df_latest["Posted By"] = date.today().isoformat()
df_latest["Posted After"] = prev_date.isoformat()

In [19]:
# Strip query params from Link columns
df_latest["Link"] = df_latest["Link"].str.split("?").str[0]
df_prev["Link"] = df_prev["Link"].str.split("?").str[0]

# Add any columns in df that are missing from df_prev
for col in df_latest.columns:
    if col not in df_prev.columns:
        df_prev[col] = pd.NA

# Mark rows in df_prev that are no longer in df as Closed
current_links = set(df_latest["Link"])
prev_links = set(df_prev["Link"])

closed_mask = ~df_prev["Link"].isin(current_links)
df_prev.loc[closed_mask, "Closed"] = "TRUE"

# Append new rows from df that don't exist in df_prev
new_mask = ~df_latest["Link"].isin(prev_links)
df_prev = pd.concat([df_prev, df_latest.loc[new_mask]], ignore_index=True)

print(f"Previous rows: {len(prev_links)}")
print(f"Closed: {closed_mask.sum()}")
print(f"New: {new_mask.sum()}")
print(f"Total rows: {len(df_prev)}")

Previous rows: 401
Closed: 62
New: 75
Total rows: 476


In [20]:

sheet_name = date.today().isoformat()
existing = {w.title for w in sh.worksheets()}
if sheet_name in existing:
    n = 2
    while f"{sheet_name}.{n}" in existing:
        n += 1
    sheet_name = f"{sheet_name}.{n}"

ws = sh.add_worksheet(title=sheet_name, rows=len(df_prev) + 1, cols=len(df_prev.columns))

csv_buf = StringIO()
df_prev.to_csv(csv_buf, index=False)
rows = list(csv.reader(StringIO(csv_buf.getvalue())))
ws.update(rows, value_input_option="USER_ENTERED")
print(f"Uploaded {len(rows)-1} rows to new sheet '{sheet_name}'")

Uploaded 476 rows to new sheet '2026-02-19'


In [24]:
df_main = df_prev

# 3) Enhance

In [21]:
from playwright.async_api import async_playwright

async def scrape_google_job(target_url, page):
    print("Fetching", target_url)
    try:
        await page.goto(target_url, wait_until="domcontentloaded")

        # Wait for actual job detail content to appear
        await page.wait_for_selector("div.KwJkGe", timeout=10000)

        # Extract Data
        job_data = {
            "qualifications": await page.locator("div.KwJkGe").inner_text(),
            "about": await page.locator("div.aG5W3").inner_text(),
            "responsibilities": await page.locator("div.BDNOWe").inner_text(),
            "Link": target_url,
        }
        return job_data
    except Exception as e:
        print(f"  Failed: {e}")
        return {"Link": target_url}

In [28]:
df, date = load_previous_data()

url = df['Link'][0]
print(url)

Most recent sheet: '2026-02-19'
https://www.google.com/about/careers/applications/jobs/results/93027319500677830-staff-software-engineer-google-photos


In [29]:
page_data = []
async with async_playwright() as p:
    browser = await p.chromium.launch(headless=True)
    context = await browser.new_context(
        user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
    )
    page = await context.new_page()
    for index, row in df[~df['Enhanced'].str.contains("TRUE")].iterrows():
        url = row['Link']
        data = await scrape_google_job(url, page)
        page_data.append(data)
    await browser.close()
df_details = pd.DataFrame(page_data)

Fetching https://www.google.com/about/careers/applications/jobs/results/110640626852078278-staff-software-engineer-infrastructure-platforms-infrastructure-engineering
Fetching https://www.google.com/about/careers/applications/jobs/results/79836147521331910-software-engineer-ii-site-reliability
Fetching https://www.google.com/about/careers/applications/jobs/results/122510679507641030-software-engineer-iii-embedded-systemsfirmware-pixel-video
Fetching https://www.google.com/about/careers/applications/jobs/results/112847655926670022-software-engineer-iii-mobile-android-xr
Fetching https://www.google.com/about/careers/applications/jobs/results/126028941696606918-senior-software-engineer-aiml-search-ads
Fetching https://www.google.com/about/careers/applications/jobs/results/107803540033807046-senior-software-engineer-google-cloud-storage
Fetching https://www.google.com/about/careers/applications/jobs/results/107794365983662790-senior-software-engineer-aiml-youtube
Fetching https://www.googl

In [30]:
df_details.to_csv("workspace/scrape/jobs/google/data/google_details_raw.csv")