In [1]:
from instagrapi import Client
from instagrapi.exceptions import LoginRequired
import pandas as pd
# python dotenv
from dotenv import load_dotenv
import os

load_dotenv()

USERNAME = os.getenv("USERNAME")
PASSWORD = os.getenv("PASSWORD")

number_of_posts = 20
number_of_influencers = 10

client = Client()

# Attempt to load session
try: 
    session = client.load_settings("session.json")
except:
    session = None

login_via_session = False
login_via_pw = False

# Attempt login via session
if session:
    try:
        client.set_settings(session)
        client.login(USERNAME, PASSWORD)

        # Check if session is valid
        try:
            client.get_timeline_feed()
        except LoginRequired:
            print("Session is invalid, need to login via username and password")

            old_session = client.get_settings()

            # Use the same device uuids across logins
            client.set_settings({})
            client.set_uuids(old_session["uuids"])

            client.login(USERNAME, PASSWORD)
            client.dump_settings("session.json")
            
        login_via_session = True
    except Exception as e:
        print(f"Couldn't login user using session information: {e}")

# Attempt login via username and password if session login fails
if not login_via_session:
    try:
        print(f"Attempting to login via username and password. Username: {USERNAME}")
        if client.login(USERNAME, PASSWORD):
            client.dump_settings("session.json")
            login_via_pw = True
    except Exception as e:
        print(f"Couldn't login user using username and password: {e}")
else:
    print("Logged in via session")

if not login_via_pw and not login_via_session:
    print("Couldn't login user with either password or session")


def get_user_id(username):
    try:
        return client.user_id_from_username(username)
    except:
        return None

def get_user_medias(user_id):
    try: 
        return client.user_medias(user_id, number_of_posts)
    except:
        return None
        

Couldn't login user using session information: challenge_required
Attempting to login via username and password. Username: cocha.app


In [2]:
# Read influencer data from CSV
influencer = pd.read_csv("influencer.csv")


In [3]:
round = 144
number_of_influencers = 15

# create df_all to store all the data
df_all = pd.DataFrame()

for index, row in influencer.iterrows():
    if index < round:
        continue
    if index >= number_of_influencers + round:
        break

    print(f"### {index} ###")

    display_name = row["display_name"]
    username = row["username"]
    user_id = get_user_id(username)
    if user_id is None:
        print(f"User {username} not found")
        continue
    medias = get_user_medias(user_id)
    if medias is None:
        print(f"Media for {username} not found")
        continue

    print(f"### {username} ###")

    # Create DataFrame from media data
    df = pd.DataFrame(
        {
            "taken_at": [media.taken_at for media in medias],
            "media_type": [media.media_type for media in medias],
            # "image_versions2": [str(media.image_versions2) for media in medias],
            "resources": [
                ", ".join(
                    [
                        str(resource.thumbnail_url)
                        for resource in getattr(media, "resources", [])
                    ]
                )
                for media in medias
            ],
            "comment_count": [
                getattr(media, "comment_count", None) for media in medias
            ],
            "like_count": [getattr(media, "like_count", None) for media in medias],
            "play_count": [getattr(media, "play_count", None) for media in medias],
            "caption_text": [getattr(media, "caption_text", None) for media in medias],
            "accessibility_caption": [
                getattr(media, "accessibility_caption", None) for media in medias
            ],
            "thumbnail_url": [
                getattr(media, "thumbnail_url", None) for media in medias
            ],
            # "usertags": [
            #     ", ".join(
            #         [
            #             usertag.user.username
            #             for usertag in getattr(media, "usertags", [])
            #         ]
            #     )
            #     for media in medias
            # ],
            # "sponsor_tags": [
            #     ", ".join(
            #         [
            #             sponsor_tag.user.username
            #             for sponsor_tag in getattr(media, "sponsor_tags", [])
            #         ]
            #     )
            #     for media in medias
            # ],
            "usertags": [
                    ", ".join(
                        [
                            usertag.user.username
                            for usertag in getattr(media, "usertags", [])
                            if hasattr(usertag, 'user')
                        ]
                    )
                    for media in medias
                ],
            "sponsor_tags": [
                ", ".join(
                    [
                        sponsor_tag.username                            
                        for sponsor_tag in getattr(media, "sponsor_tags", [])
                        if hasattr(sponsor_tag, 'username')
                    ]
                )
                for media in medias
            ],
            
            "video_url": [getattr(media, "video_url", None) for media in medias],
            "view_count": [getattr(media, "view_count", None) for media in medias],
            # "title": [getattr(media, "title", None) for media in medias],
        }
    )

    # concatenate df to df_all
    df["username"] = username
    df["display_name"] = display_name

    # changee taken_at to datetime
    # 確保 'taken_at' 是 datetime 格式，如果不是可以轉換為 datetime
    df["taken_at"] = pd.to_datetime(df["taken_at"])

    # 移除時區信息
    df["taken_at"] = df["taken_at"].dt.tz_localize(None)
    
    df_all = pd.concat([df_all, df])
    


    # Display the DataFrame
    # display(df)

# Save the DataFrame to a xlsx file, with round number in file name
df_all.to_excel(f"influencer_data{round}.xlsx", index=False)



### 144 ###


Status 201: JSONDecodeError in public_request (url=https://www.instagram.com/ts_melody1101/?__a=1&__d=dis) >>> 
Status 201: JSONDecodeError in public_request (url=https://www.instagram.com/ts_melody1101/?__a=1&__d=dis) >>> 
Status 201: JSONDecodeError in public_request (url=https://www.instagram.com/ts_melody1101/?__a=1&__d=dis) >>> 


User ts_melody1101 not found
### 145 ###


Status 201: JSONDecodeError in public_request (url=https://www.instagram.com/miandbanban/?__a=1&__d=dis) >>> 
Status 201: JSONDecodeError in public_request (url=https://www.instagram.com/miandbanban/?__a=1&__d=dis) >>> 
Status 201: JSONDecodeError in public_request (url=https://www.instagram.com/miandbanban/?__a=1&__d=dis) >>> 


User miandbanban not found
### 146 ###


Status 201: JSONDecodeError in public_request (url=https://www.instagram.com/sonia_0929/?__a=1&__d=dis) >>> 
Status 201: JSONDecodeError in public_request (url=https://www.instagram.com/sonia_0929/?__a=1&__d=dis) >>> 
Status 201: JSONDecodeError in public_request (url=https://www.instagram.com/sonia_0929/?__a=1&__d=dis) >>> 


User sonia_0929 not found
### 147 ###


Status 201: JSONDecodeError in public_request (url=https://www.instagram.com/somethingstudiotaiwan/?__a=1&__d=dis) >>> 
Status 201: JSONDecodeError in public_request (url=https://www.instagram.com/somethingstudiotaiwan/?__a=1&__d=dis) >>> 
Status 201: JSONDecodeError in public_request (url=https://www.instagram.com/somethingstudiotaiwan/?__a=1&__d=dis) >>> 


User somethingstudiotaiwan not found
### 148 ###


Status 201: JSONDecodeError in public_request (url=https://www.instagram.com/jimmy97713/?__a=1&__d=dis) >>> 
Status 201: JSONDecodeError in public_request (url=https://www.instagram.com/jimmy97713/?__a=1&__d=dis) >>> 
Status 201: JSONDecodeError in public_request (url=https://www.instagram.com/jimmy97713/?__a=1&__d=dis) >>> 


In [None]:
# # for all sponser tags in df, print len
# for index, row in df.iterrows():
#     if row["sponsor_tags"]:
#         print(len(row["sponsor_tags"]))