<a href="https://colab.research.google.com/github/chenchenga07/Python-Data-Analysis/blob/master/Instagram%20API/Pulling_Data_through_Instagram_API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import json
import pandas as pd
import yaml


def business_discovery(
        target_account: str,
        limit_posts: int = 500
) -> dict:
    
    #load my Instagram API Keys
    with open('/content/drive/My Drive/Colab Notebooks/Instagram Data Analysis/api_keys.yml') as api_keys_config:
        api_cred = yaml.safe_load(api_keys_config)
        api_user_id = api_cred['ig_api']['user_id']
        api_access_token = api_cred['ig_api']['access_token']
    
    #send request and get response
    base_url = 'https://graph.facebook.com/v7.0/'
    
    url = f'{base_url}{api_user_id}'
    
    fields_template = 'business_discovery.username({target})' + \
        '{{followers_count,media_count,media.limit({limit})' + \
        '{{media_type,caption,comments_count,like_count,timestamp}}}}'
    
    params = {
        'fields': fields_template.format(target=target_account, limit=limit_posts),
        'access_token': api_access_token
    }
    
    response = requests.get(url, params=params)

    if response.ok:
        data = json.loads(response.text)
        return data
    else:
        raise requests.HTTPError(response)


In [2]:
business_discovery('bluebottle',1) 
#try pull Bluebottle's latest post

{'business_discovery': {'followers_count': 387563,
  'id': '17841401441775531',
  'media': {'data': [{'caption': 'Here’s to the peace and comfort of connecting over coffee. 📸hangrygirlz',
     'comments_count': 1,
     'id': '17843235818346024',
     'like_count': 1193,
     'media_type': 'IMAGE',
     'timestamp': '2020-08-29T14:30:46+0000'}],
   'paging': {'cursors': {'after': 'QVFIUnZA3WHcyb2lEeXlJWkp6NndYc1haWXdKSmlMT1FWdUxzY19aR2d2WHpFXzFYcmRWZAmtYVGxTSGVhWnVyck9SNXhxOU5UVGVsbnpGNExjRGZAsa1FKNENR'}}},
  'media_count': 1629},
 'id': '17841400840307442'}

In [3]:
def post_statistics(
        target_account: str,
        limit_posts: int = 50,
) -> pd.DataFrame:
    timeline = business_discovery(target_account, limit_posts)

    media = pd.DataFrame(timeline['business_discovery']['media']['data'])
    return media

#try pull Jimmychoo's latest 5 posts
posts = post_statistics('jimmychoo',5)
posts

#posts.to_csv('posts.csv', index=True)
#posts.to_excel('posts.xlsx', index = True)

Unnamed: 0,media_type,caption,comments_count,like_count,timestamp,id
0,IMAGE,Escape high summer afternoons and lounge in st...,55,22540,2020-08-29T14:01:08+0000,17885022049701442
1,IMAGE,Summer whites that last a lifetime: pair the L...,45,12764,2020-08-27T15:53:12+0000,17876236183821109
2,VIDEO,Coming soon: the latest #INMYCHOOS campaign fo...,102,12937,2020-08-25T16:02:50+0000,17959864612343903
3,IMAGE,Garden gatherings never looked so stylish than...,82,28575,2020-08-23T14:00:34+0000,17893325662599619
4,IMAGE,Taking a pause with the AUNA mules. Exuding a ...,70,18293,2020-08-22T13:59:38+0000,17922105067439662


In [4]:
#get account total follower count
def followers_c(
        target_account: str,
        limit_posts: int = 50,
) -> pd.DataFrame:
    timeline = business_discovery(target_account, limit_posts)
    
    followers_count = timeline['business_discovery']['followers_count']
    return followers_count

#try pull Stuartweitzman's total follower count
total_followers = followers_c('stuartweitzman',1)
total_followers

1420535

In [5]:
#get account total post count
def media_c(
        target_account: str,
        limit_posts: int = 50,
) -> pd.DataFrame:
    timeline = business_discovery(target_account, limit_posts)
    
    media_count = timeline['business_discovery']['media_count']
    return media_count

#try pull Stuartweitzman's total post count 
total_media = media_c('stuartweitzman',1)
total_media

4132

In [6]:
#list out all Instagram accounts we want to analyze
target_usernames = ['stuartweitzman','aquazzura','gianvitorossi','manoloblahnik',
                    'tamaramellon','tods','sergiorossi','mgemi',
                    'jimmychoo','louboutinworld','ferragamo',
                    'gucci','byfar_official']

#export 1 file for multiple brands
posts_all = [] #list

for name in target_usernames:
    posts = post_statistics(name,300)
    posts['brand'] = name
    posts['total_followers'] = followers_c(name,1)
    posts['total_media'] = media_c(name,1)

    posts_all.append(posts)


posts_all_df =  pd.concat(posts_all, ignore_index=True) #to dataframe
posts_all_df.head()

Unnamed: 0,media_type,caption,comments_count,like_count,timestamp,id,brand,total_followers,total_media
0,IMAGE,Vote for yourself. Vote for your community. Vo...,11,1180,2020-08-28T13:24:35+0000,18160097662007249,stuartweitzman,1420535,4132
1,VIDEO,Live footage from checking our voter registrat...,8,541,2020-08-26T20:50:00+0000,18160751644038539,stuartweitzman,1420535,4132
2,IMAGE,From our partner iamavoter: Four million young...,11,1325,2020-08-25T20:35:10+0000,17975347864313438,stuartweitzman,1420535,4132
3,IMAGE,Decisions are made by those who show up. Your ...,11,1409,2020-08-25T16:32:45+0000,17892158104606097,stuartweitzman,1420535,4132
4,IMAGE,"Our limited-edition #5050VOTE boot, designed f...",12,1814,2020-08-24T16:45:05+0000,17903527630504083,stuartweitzman,1420535,4132


In [None]:
posts_all_df.to_csv(f'/content/drive/My Drive/Colab Notebooks/Instagram Data Analysis/All_Brands_Posts.csv', index=True)

In [None]:
#export 1 file for 1 brand - if needed
#for name in target_usernames:
#  posts = post_statistics(name,20)
#  posts.to_csv(f'Export files/{name}_posts.csv', index=True)