In [1]:
#import libraries
import time
import pandas as pd
from google_play_scraper import reviews, Sort, app

In [2]:
APP_ID = "com.linkedin.android"     #LinkedIn app
LANG   = "en"                       #only English
COUNTRY= "us"                       #only US
MAX_REVIEWS_TOTAL = 10000           #target total reviews
BATCH_SIZE = 200                    #google-play-scraper max per call (200)
SLEEP_SEC = 0.8                     #be polite between calls

all_rows = []
token = None
while True:
    batch, token = reviews(
        APP_ID,
        lang = LANG,
        country = COUNTRY,
        sort = Sort.NEWEST,
        count = BATCH_SIZE,
        continuation_token = token
    )
    all_rows.extend(batch)
    print(f"Fetched so far: {len(all_rows)}")
    #stop if no more pages or we hit our target
    if token is None or len(all_rows) >= MAX_REVIEWS_TOTAL:
        break
    time.sleep(SLEEP_SEC)

#build the data frame
df = pd.DataFrame(all_rows)
if df.empty:
    raise RuntimeError("No reviews were fetched. Try lowering MAX_REVIEWS_TOTAL or checking connectivity.")

#keep useful columns & rename
keep_cols = ["reviewId", "userName", "score", "at", "content", "replyContent"]
df = df[keep_cols].rename(columns = {
    "score": "rating",
    "at": "date",
    "content": "review",
    "replyContent": "dev_reply"
})

#add locale tags
df["lang"] = LANG
df["country"] = COUNTRY

#deduplicate and drop empty reviews
before = len(df)
df = df.drop_duplicates("reviewId")
df = df[df["review"].astype(str).str.strip().astype(bool)]
after = len(df)
print(f"Dedup/empty filter: {before} -> {after}")

Fetched so far: 200
Fetched so far: 400
Fetched so far: 600
Fetched so far: 800
Fetched so far: 1000
Fetched so far: 1200
Fetched so far: 1400
Fetched so far: 1600
Fetched so far: 1800
Fetched so far: 2000
Fetched so far: 2200
Fetched so far: 2400
Fetched so far: 2600
Fetched so far: 2800
Fetched so far: 3000
Fetched so far: 3200
Fetched so far: 3400
Fetched so far: 3600
Fetched so far: 3800
Fetched so far: 4000
Fetched so far: 4200
Fetched so far: 4400
Fetched so far: 4600
Fetched so far: 4800
Fetched so far: 5000
Fetched so far: 5200
Fetched so far: 5400
Fetched so far: 5600
Fetched so far: 5800
Fetched so far: 6000
Fetched so far: 6200
Fetched so far: 6400
Fetched so far: 6600
Fetched so far: 6800
Fetched so far: 7000
Fetched so far: 7200
Fetched so far: 7400
Fetched so far: 7600
Fetched so far: 7800
Fetched so far: 8000
Fetched so far: 8200
Fetched so far: 8400
Fetched so far: 8600
Fetched so far: 8800
Fetched so far: 9000
Fetched so far: 9200
Fetched so far: 9400
Fetched so far: 9

In [3]:
#save
df.to_csv("linkedin_reviews_raw.csv", index = False)
df.to_json("linkedin_reviews_raw.jsonl", orient = "records", lines = True)

#quick summary
print("Final counts:")
print(df.groupby(["country","lang"]).size())
df.head()

Final counts:
country  lang
us       en      10000
dtype: int64


Unnamed: 0,reviewId,userName,rating,date,review,dev_reply,lang,country
0,40cc1348-f2e3-4154-a507-3d63938bea63,abhijagan aj777,5,2025-09-16 15:04:36,it really connects people to move forward the ...,,en,us
1,b5f17747-a4e8-48f5-9de1-70f4ae674d97,Abdulrosheed Abdulazeez,5,2025-09-16 15:00:50,human resources specialist,,en,us
2,ec9cef9b-c43e-4555-9e38-16d530c638e1,Eleonora Yuabov,5,2025-09-16 14:57:36,"love the platform, Linkedin games and LinkedIn...",,en,us
3,9faf2b34-aca6-4af4-901a-e3990a37b1fd,Mayank kumar Rawat,5,2025-09-16 14:47:14,nice,,en,us
4,1002d7e6-1d93-48c6-a807-f79e5546bfbb,Siva Vishwa S,5,2025-09-16 14:46:10,good,,en,us
