In [68]:
import pandas as pd
import requests
import time

from bs4 import BeautifulSoup

In [69]:
app_id = '17ef27d3'
app_key = '430750b4d0962ced389c11f197a8d2f4'
intern_keywords = ['intern', 'internship']
focus_keywords = ['full stack', 'full-stack', 'frontend', 'front-end', 'front end', 'backend', 'back end','back-end', 'SWE', 'developer', 'development', 'develop', 'software', 'product', 'engineering', 'software engineering', 'engineer', 'technology', 'data', 'data analyst', 'data science', 'data engineer', 'data engineering', 'IT', 'information technology', 'devops', 'cloud engineer', 'cloud', 'artificial intelligence', 'machine learning', 'ML', 'AI', 'AI/ML']
location = 'United States'

all_jobs = []

for intern_keyword in intern_keywords:
    for page in range(1, 101):
        print(f"Searching '{intern_keyword}' in {location} - Page {page}")
        url = f'https://api.adzuna.com/v1/api/jobs/us/search/{page}'

        params = {
            'app_id': app_id,
            'app_key': app_key,
            'what': intern_keyword,
            'where': location,
            'results_per_page': 50,
            'content-type': 'application/json'
        }

        response = requests.get(url, params=params)
        data = response.json()

        if 'results' not in data or not data['results']:
            print("No more results.")
            break

        for job in data['results']:
            title = job['title'].lower()
            has_intern = any(kw in title for kw in intern_keywords)
            has_focus = any(kw in title for kw in focus_keywords)

            if has_intern and has_focus:
                if job['redirect_url'] not in [j['redirect_url'] for j in all_jobs]:
                    all_jobs.append(job)

        time.sleep(1)  # Respect API rate limits

if all_jobs:
    for job in all_jobs:
        print(f"{job.get('title', 'N/A')}")
        print(f"{job.get('company', {}).get('display_name', 'N/A')}")
        print(f"{job.get('location', {}).get('display_name', 'N/A')}")
        print(f"{job.get('redirect_url', 'N/A')}")
        print('-' * 80)

    job_data = []
    for job in all_jobs:
        job_data.append({
            'Title': job.get('title', 'N/A'),
            'Company': job.get('company', {}).get('display_name', 'N/A'),
            'Location': job.get('location', {}).get('display_name', 'N/A'),
            'Category': job.get('category', {}).get('label', 'N/A'),
            'Created': job.get('created', 'N/A'),
            'Description': job.get('description', 'N/A'),
            'URL': job.get('redirect_url', 'N/A')
        })

    df = pd.DataFrame(job_data)
    df.to_csv('internship_jobs_us.csv', index=False)
    print("Exported results to 'internship_jobs_us.csv'")

else:
    print("No internships found nationwide with those criteria.")


Searching 'intern' in United States - Page 1
Searching 'intern' in United States - Page 2
Searching 'intern' in United States - Page 3
Searching 'intern' in United States - Page 4
Searching 'intern' in United States - Page 5
Searching 'intern' in United States - Page 6
Searching 'intern' in United States - Page 7
Searching 'intern' in United States - Page 8
Searching 'intern' in United States - Page 9
Searching 'intern' in United States - Page 10
Searching 'intern' in United States - Page 11
Searching 'intern' in United States - Page 12
Searching 'intern' in United States - Page 13
Searching 'intern' in United States - Page 14
Searching 'intern' in United States - Page 15
Searching 'intern' in United States - Page 16
Searching 'intern' in United States - Page 17
Searching 'intern' in United States - Page 18
Searching 'intern' in United States - Page 19
Searching 'intern' in United States - Page 20
Searching 'intern' in United States - Page 21
Searching 'intern' in United States - Page 

In [70]:
import re
from datetime import datetime, timedelta, timezone

df['Created'] = pd.to_datetime(df['Created'], errors='coerce')

# matching adzuna's time format
cutoff_date = datetime.now(timezone.utc) - timedelta(days=30)

# get rid of duplicates
df['unique_key'] = df['Title'] + df['Company'] + df['Location']
df = df.drop_duplicates(subset='unique_key').drop(columns='unique_key')

# get rid of rows with missing important info like title, company, location
df = df.dropna(subset=['Title', 'Company', 'Location'])

# clean description text for later analysis
def clean_description(desc):
    desc = re.sub('<[^<]+?>', '', desc) 
    desc = re.sub(r'\s+', ' ', desc) 
    return desc.strip()

df['Description'] = df['Description'].apply(clean_description)

df = df.reset_index(drop=True)
df.to_csv('internship_jobs_us_cleaned.csv', index=False)


In [71]:
import sqlite3
import pandas as pd

df = pd.read_csv('internship_jobs_us_cleaned.csv')
conn = sqlite3.connect('internships.db')
df.to_sql('internships', conn, if_exists='replace', index=False)

1845

### Find Recent Internships (last 30 days)

In [81]:
query = """
SELECT title, company, location, created
FROM internships
WHERE DATE(created) >= DATE('now', '-30 day')
ORDER BY DATE(created) DESC
LIMIT 10
"""
recent_internships = pd.read_sql_query(query, conn)
recent_internships


Unnamed: 0,Title,Company,Location,Created
0,Engineering Intern,"US Tsubaki Automotive, LLC","Portland, Sumner County",2025-04-04 01:31:26+00:00
1,Engineering Intern,The Gund Company Inc,"Overland, Saint Louis County",2025-04-04 01:31:30+00:00
2,Manufacturing Engineering Intern,"US Tsubaki Automotive, LLC","South Hadley Falls, Hampshire County",2025-04-04 01:31:27+00:00
3,Network Engineer Internship - Summer 2025,Uline,"Pleasant Prairie, Kenosha County",2025-04-04 01:31:49+00:00
4,Engineering Internship,Napa Sanitation District,"Napa, Napa County",2025-04-04 01:42:40+00:00
5,Engineering Internship,Hazen and Sawyer,"Austin, Travis County",2025-04-04 01:39:57+00:00
6,Project Engineer Internship,U.S. Engineering,"Kansas City, Jackson County",2025-04-04 01:41:29+00:00
7,Data Intern,UWGLV AmeriCorps,"Allentown, Lehigh County",2025-04-03 01:43:55+00:00
8,Software Intern,"Canon U.S.A., Inc.","Newport Beach, Orange County",2025-04-03 12:15:02+00:00
9,Data Analytics Intern,"Canon U.S.A., Inc.","Newport Beach, Orange County",2025-04-03 12:15:02+00:00


### Finding internships with keyword "Fall 2025"

In [73]:
query = """
SELECT title, company, location, created
FROM internships
WHERE LOWER(title) LIKE '%fall 2025%'
   OR LOWER(description) LIKE '%fall 2025%'
"""
fall_internships = pd.read_sql_query(query, conn)
fall_internships


Unnamed: 0,Title,Company,Location,Created


### Counting internships per company

In [74]:
query = """
SELECT company, COUNT(*) AS num_listings
FROM internships
GROUP BY company
ORDER BY num_listings DESC
LIMIT 10
"""
top_companies = pd.read_sql_query(query, conn)
top_companies


Unnamed: 0,Company,num_listings
0,Maximus,1156
1,Childhood Cancer Society,272
2,Superior Metals Manufacturing,20
3,CPP- Syracuse,14
4,"Steel Dynamics, Inc.",12
5,CVS Health,9
6,MVP Health Care,7
7,Chen Moore and Associates,7
8,Brightwell,6
9,ISG,5


### Remote internships only

In [75]:
query = """
SELECT title, company, location
FROM internships
WHERE LOWER(location) LIKE '%remote%'
"""
remote_or_ca = pd.read_sql_query(query, conn)
remote_or_ca

Unnamed: 0,Title,Company,Location


### Title contains keyword "data"

In [76]:
query = """
SELECT title, company, location
FROM internships
WHERE LOWER(title) LIKE '%data%'
"""
data_roles = pd.read_sql_query(query, conn)
data_roles


Unnamed: 0,Title,Company,Location
0,Histopathology Imaging Data Science Intern,Genmab,"Plainsboro, Middlesex County"
1,Data Intern,UWGLV AmeriCorps,"Allentown, Lehigh County"
2,Data Analytics Intern,"Canon U.S.A., Inc.","Newport Beach, Orange County"
3,Data Science Intern,Maze Therapeutics,"South San Francisco, San Mateo County"
4,‚ñ∑ High Salary: Data Analytics Intern,"Canon U.S.A., Inc.","Irvine, Orange County"
5,IT Data Analytics Internship,Meritage Homes,"Scottsdale, Maricopa County"
6,Data Analytics Internship,Brightwell,"Atlanta, Fulton County"
7,Data Scientist Internship,Prescient Edge,"Mc Lean, Fairfax County"
8,Data Analytics Internship,Brightwell,"Sandy Springs, Fulton County"
9,"Digital Data, Internship",MVP Health Care,"Upper Union, Schenectady County"


### Finding jobs that mention SQL in description

In [77]:
query = """
SELECT title, company, location, url
FROM internships
WHERE LOWER(description) LIKE '%sql%';
"""

sql_jobs = pd.read_sql_query(query, conn)
sql_jobs

Unnamed: 0,Title,Company,Location,URL
0,Data Engineer Internship,Fhlbcin,US,https://www.adzuna.com/details/5035524773?utm_...
1,Engineering Internships 2025,Cockroach Labs,"New York City, New York",https://www.adzuna.com/details/5086065339?utm_...


### Most common job titles

In [78]:
query = """
SELECT title, COUNT(*) as count
FROM internships
GROUP BY title
ORDER BY count DESC
LIMIT 10;
"""

common_jobs = pd.read_sql_query(query, conn)
common_jobs

Unnamed: 0,Title,count
0,Cloud Platform Intern,324
1,Cloud Engineering Intern,297
2,Network Engineering Intern,272
3,AI Intern- Software Engineering,272
4,Systems Engineer- Intern,261
5,Engineering Internship,36
6,Engineering Internship at Superior Metals Manu...,20
7,Metallurgy Engineering Internship at CPP - Sum...,14
8,Mechanical Engineering Internship,9
9,Electrical Engineering Internship,9


### Top locations

In [79]:
query = """
SELECT location, COUNT(*) as total
FROM internships
GROUP BY location
ORDER BY total DESC
LIMIT 10;
"""

top_locations = pd.read_sql_query(query, conn)
top_locations

Unnamed: 0,Location,total
0,US,16
1,"New York City, New York",12
2,"Atlanta, Fulton County",12
3,"Austin, Travis County",9
4,"Rochester, Monroe County",8
5,"Miami, Miami-Dade County",8
6,"Kansas City, Jackson County",8
7,"Tacoma, Pierce County",7
8,"Scottsdale, Maricopa County",7
9,"San Jose, Santa Clara County",7


### Internships emphasizing soft skills

In [80]:
query = """
SELECT *
FROM internships
WHERE LOWER(description) LIKE '%communication%'
   OR LOWER(description) LIKE '%leadership%';
"""

soft_skills = pd.read_sql_query(query, conn)
soft_skills

Unnamed: 0,Title,Company,Location,Category,Created,Description,URL
0,Embedded Engineer Intern,"Ametek, Inc.","Mount Prospect, Cook County",Engineering Jobs,2025-03-26 23:26:31+00:00,"Ametek, Inc. Job description: Come Work With U...",https://www.adzuna.com/land/ad/5112940031?se=m...
1,Leadership Development Internship,Chick-fil-A,"Woodland Hills, Los Angeles County",Manufacturing Jobs,2024-08-08 09:38:55+00:00,We are excited to give college-age men and wom...,https://www.adzuna.com/details/4816838810?utm_...
2,Internship: Information Technology,Logistics Plus,"Coppell, Dallas",IT Jobs,2025-03-30 00:30:15+00:00,Job Details LP Dallas Coppell Office - Coppell...,https://www.adzuna.com/details/5118458791?utm_...
3,2025 Internship - Engineering,Kdm Engineering,"West Loop, Chicago",Engineering Jobs,2025-03-20 19:40:58+00:00,Internship Description 2025 Summer Internship ...,https://www.adzuna.com/details/5103612049?utm_...
4,Partnership Development Internship,617MediaGroup,"Boston, Suffolk County","PR, Advertising & Marketing Jobs",2024-08-04 05:42:21+00:00,"617MediaGroup, one of the fastest-growing prog...",https://www.adzuna.com/details/4809776289?utm_...
5,Internship: Information Technology,Shenandoah Valley Organic,"Harrisonburg, Harrisonburg City",Manufacturing Jobs,2025-02-26 21:58:04+00:00,Who We Are and What We Do Farmer Focus‚Äôs missi...,https://www.adzuna.com/details/5065118940?utm_...
6,Software Engineering - Internship,"Digital Factory, Inc.","Schiller Park, Cook County",IT Jobs,2025-04-01 05:41:26+00:00,Digital Factory is searching for a passionate ...,https://www.adzuna.com/details/5121196483?utm_...
7,Mechanical Engineering Internship,Communications & Power Industries,"Kilgore, Gregg County",Engineering Jobs,2025-03-23 00:23:10+00:00,Ignite your passion for engineering with hands...,https://www.adzuna.com/details/5107248750?utm_...
8,Mechanical Engineering Internship,Communications and Power Industries LLC,"Kilgore, Gregg County",Engineering Jobs,2025-03-22 00:58:44+00:00,Job Description Job Description Ignite your pa...,https://www.adzuna.com/details/5105714592?utm_...
9,Engineering Internship - Telecommunications,Olsson,"South Sioux City, Dakota County",Engineering Jobs,2025-03-30 12:23:05+00:00,Olsson‚Äôs Telecommunications team provides clie...,https://www.adzuna.com/details/5118814298?utm_...
