In [None]:
# ! pip install git+https://git@github.com/ping/instagram_private_api.git@1.6.0 --upgrade
# ! pip install instaloader

In [1]:
from instagram_private_api import Client, ClientCompatPatch
from instagram_private_api.errors import ClientError  
import time
class Instagram(object):
    def __init__(self, username = None, password = None):
        self.username = username
        self.password = password
        self.api = self.login()
        
    def login(self):
        return Client(self.username, self.password)
    
    def get_userId(self, user):
        # Getting userId to send follo or unfollow request according to this python package !
        ''' return userId for crossponding user which can be used to send follow | unfollow request '''
        return self.api.user_detail_info(user)['reel_feed']['id']
    
    def get_mediaId(self, user):
        ''' Returns mediaId for public account, 
            Note: Only top 12 media id is returned if total number of media is more than 12 else return media id
            of all the posts
        '''
        return [item['id'] for item in self.api.user_detail_info(user)['feed']['items']]
    
    def follow(self, user):
        userId = self.api.user_detail_info(user)['reel_feed']['id']
        self.api.friendships_create(userId)
        
    def unfollow(self, user):
        userId = self.api.user_detail_info(user)['reel_feed']['id']
        self.api.friendships_destroy(userId) 
        
    def likeUserPhotos(self, user, amount = None, sleepTime = 2):
        '''
        Like photos of user if username is given.
        
        # Note : change sleepTime if required
        
        Example: 
            >>> likeUserPhotos(user, amount = None, sleepTime = 2)
        '''
        media_id = [item['id'] for item in self.api.user_detail_info(user)['feed']['items']]
        if amount == None : 
            count = len(media_id)
        elif amount > len(media_id):
            count = len(media_id)
        else:
            count = amount
        for i in range(0 , count):
            self.api.post_like(media_id[i])
            time.sleep(sleepTime)
    def privateAccount(self, user):
        ''' True if the account is private else False '''
        return self.api.user_detail_info(user)['user_detail']['user']['is_private']
    
    def following(self, user):
        '''return bool value, False if I am not following that user else True '''
        return self.api.user_detail_info(user)['user_detail']['user']['friendship_status']['following']

    def followed_by(self, user):
        '''return bool value, False if I am not followed by that user else True'''
        return self.api.user_detail_info(user)['user_detail']['user']['friendship_status']['followed_by']


In [2]:
import pandas as pd
import numpy as np
from joblib import load

class featureExtraction():
    def __init__(self, dataframe):
        '''   '''
        self.username = dataframe['username'].values
        self.post = dataframe['post'].values
        self.privacy = dataframe['privacy'].values
        self.following = dataframe['followings'].values
        self.followers = dataframe['followers'].values
        self.ln_followings = np.log(dataframe['followings'].values)
        self.ln_followers = np.log(dataframe['followers'].values)
        
        # For modelling we use output. While using model disable it
        # self.output = dataframe['Output'].values

    def _following(self):
        """ categorically dividing following into  """
        category = []
        for count in self.following :
            if count > 10000 :
                category.append(5)
            elif count > 1000 :
                category.append(4)
            elif count > 500:
                category.append(3)
            elif count > 100:
                category.append(2)
            else:
                category.append(1)
        return category

    def _follower(self):
        """ return the category  """
        category = []
        for count in self.followers :
            if count > 10000 :
                category.append(5)
            elif count > 1000 :
                category.append(4)
            elif count > 500:
                category.append(3)
            elif count > 100:
                category.append(2)
            else:
                category.append(1) 
        return category

    def _post(self):
        """ return the category  """
        category = []
        for count in self.post :
            if count > 500 :
                category.append(5)
            elif count > 100 :
                category.append(4)
            elif count > 50:
                category.append(3)
            elif count > 10:
                category.append(2)
            elif count >= 1:
                category.append(1)
            else:
                category.append(0)
        return category

    def _privacy(self):
        ''' 
        2: Open Account
        1: Close Account
        0: unknown account
        '''
        category = []
        for privacy in self.post :
            if privacy == "Open" :
                category.append(2)
            elif privacy == "Private" :
                category.append(1)
            else:
                category.append(0)
        return category
    def _ratioFollowingFollower(self):
        ''' number of followers per followings'''
        return self.following/self.followers

    def exeFeatureExtraction(self):
        df = pd.DataFrame(data = self.ln_followings, columns = ['ln_following'])
        df['ln_follower'] = self.ln_followers
        df['post_category'] = self._post()
        df['privacy_category'] = self._privacy()
        df['follower_category'] = self._follower()
        df['following_category'] = self._following()
        df['ratio_Follower_Following'] = self._ratioFollowingFollower()
        
        # while modelling for a new user we use output, 
        # df['output'] = self.output
        return df

In [7]:
from instaloader import Instaloader, Profile
from instaloader.exceptions import *
from itertools import dropwhile, takewhile
import time
from datetime import datetime, timedelta
import numpy as np

class InstaDataExtractor(object):
    
    def __init__(self):
        self.L = self.login()
        
    def login(self):
        L = Instaloader()
        return L
    
    def get_likes(self, hashtag = 'travel'):
        ''' default hashtag is travel 
            return 100 recent people usernames who liked a particular hashtag! 
        '''
        assert type(hashtag) == str
        L = self.L
        data = []
        posts = L.get_hashtag_posts(hashtag)
        likes = set()

        today = datetime.today()
        tomorrow = datetime.today() + timedelta(days = 2)
        yesterday = datetime.today() + timedelta(days = -1)

        # Since is big | less past 
        SINCE = tomorrow
        # Until is small | past
        UNTIL = yesterday

        # Extract the list of people to send request to, note : We are selecting the users who are recently active. 
        for post in takewhile(lambda p: p.date > UNTIL, dropwhile(lambda p: p.date > SINCE, posts)):
            likes = post.get_likes()
            for like in likes:
                data.append(like.username)
            users = np.unique(data)
            if len(users) >= 100: 
                break;
            else:
                pass
            
        return users[:100]

    def get_followers(self, user):
        """ returns list of followers of user """
        # Extracting list of my follower and followees !!
        L = self.L
        profile = Profile.from_username(L.context, user)
        followers = []
        for follower in profile.get_followers():
            followers.append(follower.username)
        return followers
        
    def get_followees(self, user):
        ''' returns list of followees of users '''
        L = self.L
        profile = Profile.from_username(L.context, user)
        followees = []
        for followee in profile.get_followees():
            followees.append(followee.username)
        return followees
        
    def get_post(self, user, path = ''):
      '''return all the post of the user'''
      L = self.L
      profile = Profile.from_username(L.context, user)
      for post in profile.get_posts():
        L.download_post(post, target= path)

    def users_data(self, users):
        ''' Take a list of users and return these data.
            
            'username', 'followers', 'followees', 'private_account', 'post_media', 'post_igtv', 'full_name', 'biography'
            
            Suggested to put 100 user at a time. 
        '''
        L = self.L
        data = []
        for count, user in enumerate(users):
            try: 
                profile = Profile.from_username(L.context, user)
                if count % 5 == 0:
                    time.sleep(1)
                followers = profile.followers
                followees = profile.followees
                private_account = profile.is_private
                posts_media = profile.mediacount
                posts_igtv = profile.igtvcount
                full_name = profile.full_name
                bio = profile.biography
                data.append((user, followers, followees, private_account, posts_media, posts_igtv, full_name, bio))
            except ProfileNotExistsException:
                pass
        return data

In [None]:
# insta = Instagram(username = 'rat163002', password = 'Abbi@163')
insta = Instagram(username = '_hernando_malik', password = 'Anand@1630')
# insta = Instagram(username = 'abbijeetanand', password = 'Abhi@1630')

In [9]:
import pandas as pd
from datetime import datetime, timedelta

dataExtractor = InstaDataExtractor()


In [None]:
path = 'data/'
# hashtags = ['fcbarcelona', 'cairo', 'weedmemes']
hashtags = ['malaga', 'granada', 'bilbao']
# hashtags = ['gay', 'nigeria', 'blackmemes']
# hashtags = ['meme', 'coronavirus', 'jesus']

today = datetime.today()
for count, hashtag in enumerate(hashtags):
    print(hashtag)
    start = time.time()
    users = dataExtractor.get_likes(hashtag = 'photography')
    print(len(users))
    time.sleep(2)
    data = dataExtractor.users_data(users)
    print(len(data))
    columns = ['username', 'followers', 'followees', 'private_account', 'post_media', 'post_igtv', 'full_name', 'biography']
    dataframe = pd.DataFrame(data = data, columns = columns)
    dataframe['hashtag'] = hashtag
    dataframe['date'] = today.date()
    dataframe.to_csv(path + hashtag + str(count) + str(today.date()) + '.csv' , index = None)
    end = time.time()
    print(hashtag + str(count), end - start)

fcbarcelona
100
100
fcbarcelona0 299.4555633068085
cairo
100

Too many queries in the last time. Need to wait 196 seconds, until 15:04.


In [None]:
model = load('model/lgm.model')
df = pd.read_csv('data/tokyo22020-04-22.csv')
dataframe = df[df['private_account'] == False]
dataframe['private_account'] = 'Open'
dataframe['post'] = dataframe['post_media'] + dataframe['post_igtv']
data = dataframe.rename(columns = {'private_account': 'privacy', 'followees': 'followings'})

data = data[['username', 'privacy', 'post', 'followings', 'followers']]
data = data[(data.followings != 0) & (data.followers != 0) & (data.privacy == "Open")]

if len(data) == 0:
    pass
else:
    feature = featureExtraction(data)
    df = feature.exeFeatureExtraction()
    X = df.iloc[:, :7].values
    model_flag = model.predict(X)  #[0]
    
toFollow = pd.DataFrame()
toFollow['username'] = data['username']
toFollow['Flag'] = model_flag
toFollow = toFollow[toFollow['Flag'] == 0]

users = toFollow['username'].values

In [None]:
# time.sleep(650)
print('total users to follow here is ', len(users))
RequestSent = 0
privateAccount = 0
Error = 0
for user in users:
    try:
        if (insta.following(user) == False and insta.followed_by(user) == False ):
            time.sleep(2)
            insta.follow(user)
            time.sleep(2)

            insta.likeUserPhotos(user, amount = 4, sleepTime = 2)
            print('Sent Request to : ', user)
            time.sleep(2)
            RequestSent += 1
        else: 
            print('already know this user : ', user)

    except ClientError:
        print('Error in requesting : ', user)
        time.sleep(2)
        Error += 1

print('Total Request Sent :', RequestSent)
print('Total Error Happened:', Error)
print('Total Private Account:', privateAccount)
print(len(users))