In [None]:
from instaloader import Instaloader, Profile, Post 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
import os
from time import sleep
from tqdm.notebook import tqdm


In [None]:
def reali_login_context(login_path):
    load_login = open(login_path, 'r+').read().split(",")
    L = Instaloader()
    L.login(load_login[0], load_login[1])
    print(f'Login realizado com sucesso na conta: {load_login[0]}!!!')
    
    return L

In [None]:
def analys_post(shortcode, context):
    post = Post.from_shortcode(L.context, shortcode)
    columns_data = ['shortcode', 'autor', 'likes', 'comments', 'video_view_count', 'type', 'is_video', 'date']   
    data = pd.DataFrame([], columns = columns_data )
    list_post = [post.shortcode, post.owner_username, post.likes, post.comments, post.video_view_count,\
                          post.typename, post.is_video, post.date_local]            
            
    data = data.append(pd.DataFrame([list_post], columns = columns_data))             

    data.fillna(0, inplace = True)
    data[['likes', 'comments', 'video_view_count']] = data[['likes', 'comments', \
                                                                        'video_view_count']].astype(np.int, errors='ignore')
    
    data['year']= pd.DatetimeIndex(data['date']).year
    data['month']= pd.DatetimeIndex(data['date']).month
    data['day']= pd.DatetimeIndex(data['date']).weekday
    data['hour']= pd.DatetimeIndex(data['date']).hour
    
    return data

In [None]:
def create_data(name, context, num_posts = 30, save = False, pin = True):
    profile_name = name
    profile = Profile.from_username(context, profile_name)
    columns_data = ['shortcode', 'autor', 'likes', 'comments', 'video_view_count', 'followers', 'type', 'is_video', 'date']   
    data = pd.DataFrame([], columns = columns_data )
    count = 0
    for post, _ in zip(profile.get_posts(),tqdm(range(num_posts))):
        if not post.is_pinned or pin:
            list_post = [post.shortcode, post.owner_username, post.likes, post.comments, post.video_view_count,\
                         profile.followers, post.typename, post.is_video, post.date_local]            
            
            data = data.append(pd.DataFrame([list_post], columns = columns_data))             
            count +=1
            sleep(2)
            if count>199:
                sleep(4*60)
                count =0
                
    data.fillna(0, inplace = True)
    data[['likes', 'comments','followers', 'video_view_count']] = data[['likes', 'comments','followers', \
                                                                        'video_view_count']].astype(np.int, errors='ignore')
    
    data['year']= pd.DatetimeIndex(data['date']).year
    data['month']= pd.DatetimeIndex(data['date']).month
    data['day']= pd.DatetimeIndex(data['date']).weekday
    data['hour']= pd.DatetimeIndex(data['date']).hour
    
    if save:
        try:
            os.makedirs("csv/"+name)
            print('Pasta criada csv/'+name+', e salva.')
        except:
            print('Salvo na pasta csv/' + name) 
        
        date_time = data['date'].iloc[-1]
        date_arq = str(date_time.year) + '_' + str(date_time.month) + '_' + str(date_time.day)
        new_path = data['autor'].iloc[-1] + '_data_' + date_arq + '.csv'
        data = data.sort_values(by = 'date').reset_index(drop = True)
        data.to_csv('csv/'+name+'/'+new_path, index = False)    
    
    print("Concluído com sucesso !!!")
       
    return data

In [None]:
def update_data(name, context, last_up):
    
    data = pd.read_csv('csv/'+name+'/'+name+'_data_'+last_up.replace('/', '_')+'.csv') 
    data['date'] = pd.to_datetime(data['date'])
    data.fillna(0)     
    mem0 = create_data(name, context, num_posts = 30, save = False, pin = True)       
    
    mem0['year']= pd.DatetimeIndex(mem0['date']).year
    mem0['month']= pd.DatetimeIndex(mem0['date']).month
    mem0['day']= pd.DatetimeIndex(mem0['date']).weekday
    mem0['hour']= pd.DatetimeIndex(mem0['date']).hour    
    mem0 = mem0.sort_values(by = 'date')  
           
    data.iloc[-20:][['likes', 'comments', 'video_view_count']] = \
    data[['shortcode']].iloc[-20:].merge(mem0, how= 'inner', on = 'shortcode')[['likes', 'comments', 'video_view_count']]\
    .set_index(data[['shortcode']].iloc[-20:].index)
    
    mem1 = mem0[mem0['date'] > data['date'].iloc[-1]].copy()
        
    data_updated = pd.concat([data, mem1]).reset_index(drop = True)     
        
    try:
        os.makedirs("csv/"+name)
        print('Pasta criada csv/'+name+', e salva.')
    except:
        print('Salvo na pasta csv/' + name) 
        
    date_time = data_updated['date'].iloc[-1]
    date_arq = str(date_time.year) + '_' + str(date_time.month) + '_' + str(date_time.day)
    new_path = data_updated['autor'].iloc[-1] + '_data_' + date_arq + '.csv'
    data_updated.to_csv('csv/'+name+'/'+new_path, index = False)    
    
    print("Concluído com sucesso !!!")
    return date_arq 

In [None]:
def metric_insta_reels(data):
    data['reels_viral'] = round(data['video_view_count']/(0.12*data['followers']), 2)    
    return data

In [None]:
def metric_insta_publi(data):
    data['publi_viral'] = round(data['likes']/(0.012*data['followers']), 2)
    return data

In [None]:
def analysis_conc(list_con, context, date, exc_pin = False):
    date = pd.to_datetime(date, format = '%d/%m/%Y', utc =True)
    columns_data = ['shortcode', 'autor', 'likes', 'comments', 'video_view_count', 'followers', 'type', 'is_video', 'date']
    data = pd.DataFrame([], columns = columns_data ) 
    
    for profile_name, _ in zip(list_con, tqdm(range(len(list_con)))):
        print('Carregando dados de: ' + profile_name)
        profile = Profile.from_username(context, profile_name) 
        sleep(5)
        for post in profile.get_posts():
            sleep(2)
            if exc_pin and post.is_pinned:
                continue                           
            elif post.date_local > date:
                list_post = [post.shortcode, post.owner_username, post.likes, post.comments, post.video_view_count, \
                             profile.followers, post.typename, post.is_video, post.date_local]
                data = data.append(pd.DataFrame([list_post], columns = columns_data)) 
            else:
                if not post.is_pinned:
                    break
                
    data.fillna(0, inplace = True)
    data = metric_insta_reels(data)
    data = metric_insta_publi(data)
    data['date'] = data['date'].astype(str)
    data['link'] = 'https://www.instagram.com/p/'+data['shortcode']           
    data = data.sort_values(by='publi_viral', ascending = False)
    
    return data.reset_index(drop=True)

In [None]:
def  barplot_group_data(data, col, group, hue_insert = False):
    plt.figure(figsize = [15,9])
    sns.set_theme(style="whitegrid")
    if hue_insert:
        ax = sns.barplot(x = group, y = col, hue = 'type', data = data)
    else:
        ax = sns.barplot(x = group, y = col, data = data)
    ax.set(title = 'Gráfico de barras para '+col+' x '+group)

In [None]:
def boxplot_group_data(data, col, group):
    plt.figure(figsize = [15,9])
    sns.set_theme(style="whitegrid")
    ax = sns.boxplot(data = data, x = group, y = col)
    ax.set(title = 'Boxplot para '+col+' x '+group)

In [None]:
def boxplot_hist_group_data(data, col, group, out = False):
    fig, axes = plt.subplots(1, 2, figsize = [15,9])
    sns.set_theme(style="whitegrid")
    sns.boxplot(ax = axes[0], orient = 'h', data = data, y = group, x = col, showfliers = out)
    axes[0].set(title = 'Boxplot para '+col+' x '+group)
    at = data[group].value_counts().sort_index().reset_index()
    sns.barplot(ax = axes[1],data = at, y = 'index', x = group, orient = 'h')
    axes[0].set(title = 'Gráfico de barras para '+group)

In [None]:
# Função para criar log diário
def log_daily_update(last_up, name, today, arq_type = 'csv',new = False):
    
    mem0 = pd.read_csv('csv/'+name+'/'+name+'_data_'+today.replace('/', '_')+'.csv')
    mem0['date'] = pd.to_datetime(mem0['date'])  
    
    if new:
        data = pd.DataFrame([], columns=['autor', 'followers','reels', 't_posts','likes','video_view_count',  'taxa_likes', \
                                         'taxa_views', 'date', 'mean_LLW', 'mean_LLM', 'mean_VLW', 'mean_VLM', \
                                        ])
    else:
        if arq_type == 'csv':
            data = pd.read_csv('csv/'+name+'/'+name+'_log_'+last_up.replace('/', '_')+'.csv')
        else:
            data = pd.read_excel('excel/'+name+'/'+name+'_log_'+last_up.replace('/', '_')+'.xlsx')            
            
        data.fillna(0)
        date = data['date'].iloc[-1]
        date_update = pd.to_datetime(date, format = '%d/%m/%Y', utc =True) - pd.Timedelta(days= 30)
        date_update = str(date_update.day) + '/' + str(date_update.month) + '/' + str(date_update.year)        
        mem0 = mem0[mem0['date']>= date_update]

    mem0['t_posts'] = 1
    mem0['day']= pd.DatetimeIndex(mem0['date']).day
    
    mem1 = mem0.groupby(['year', 'month', 'day']).sum().reset_index()
    
    mem1['reels'] = mem1['is_video']
    mem1['date'] = mem1['day'].apply(lambda x : str(x)) + mem1['month'].apply(lambda x : '/'+str(x)) + mem1['year'].apply(lambda x : '/'+str(x)) 
    mem1['date_aux'] = pd.to_datetime(mem1['date'], format = '%d/%m/%Y', utc =True)
    mem1['autor'] = name
    
    if not new: 
        
        data.iloc[-20:][['reels', 't_posts', 'likes', 'video_view_count']] = \
        data[['date']].iloc[-20:].merge(mem1, how= 'inner', on = 'date')[['reels', 't_posts','likes', 'video_view_count']]\
        .set_index(data[['date']].iloc[-20:].index)
        
        data['mean_LLW'].iloc[-20:] = np.nan
        data['taxa_likes'].iloc[-20:] = data['likes'].iloc[-20:]/data['followers'].iloc[-20:]
        data['taxa_views'].iloc[-20:] = data['video_view_count'].iloc[-20:]/data['followers'].iloc[-20:]
    
    mem2 = mem1[mem1['date_aux'] > pd.to_datetime(date, format = '%d/%m/%Y', utc =True)]
    data_update = pd.DataFrame([], columns=[])
    
    if mem2.shape[0] > 0:
        data_update['likes'] = mem2['likes']
        data_update['reels'] = mem2['reels']
        data_update['t_posts'] = mem2['t_posts']
        data_update['followers'] = mem0['followers'].iloc[-1]
        data_update['video_view_count'] = mem2['video_view_count']
        data_update['taxa_likes'] = mem2['likes']/mem0['followers'].iloc[-1]
        data_update['taxa_views'] = mem2['video_view_count']/mem0['followers'].iloc[-1]
        data_update['date'] = mem1['date']
        data_update['autor'] = name  
    
    data_updated = pd.concat([data, data_update]).reset_index(drop = True)     

    if new:
        data_updated = preenche_col_metric_log(data_updated)
        data_updated = preenche_col_metric_log(data_updated, week = False)
        data_updated = preenche_col_metric_log(data_updated, mean_col = 'video_view_count', L = 'V')
        data_updated = preenche_col_metric_log(data_updated, mean_col = 'video_view_count', L = 'V', week = False)
    else:
        for index in data_updated[data_updated['mean_LLW'].isna()].index:
            data_updated.loc[index-8:index] = mean_log_week(data_updated.loc[index-8:index], col = 'likes', col2 = 'mean_LLW')
            data_updated.loc[index-30:index] = mean_log_month(data_updated.loc[index-30:index], col = 'likes', col2 = 'mean_LLM')
            data_updated.loc[index-8:index] = mean_log_week(data_updated.loc[index-8:index], col = 'video_view_count', col2 = 'mean_VLW')
            data_updated.loc[index-30:index] = mean_log_month(data_updated.loc[index-30:index], col = 'video_view_count', col2 = 'mean_VLM')
            
    
    data_updated['mean_LLW'] = data_updated['mean_LLW'].astype(np.int)
    data_updated['mean_LLM'] = data_updated['mean_LLM'].astype(np.int)
    data_updated['mean_VLW'] = data_updated['mean_VLW'].astype(np.int)
    data_updated['mean_VLM'] = data_updated['mean_VLM'].astype(np.int)
    
    if arq_type == 'csv':
        try:
            os.makedirs("csv/"+name)
            print('Pasta criada csv/'+name+', e salva.')
        except:
            print('Salvo na pasta csv/' + name)
            
        date_time = data_updated['date'].iloc[-1].split('/')        
        new_path = data_updated['autor'].iloc[-1] + '_log_' + f'{date_time[-1]}_{date_time[1]}_{date_time[0]}' + '.csv'
        data_updated.to_csv('csv/'+name+'/'+new_path, index = False)
    else:
        try:
            os.makedirs("excel/"+name)
            print('Pasta criada excel/'+name+', e salva.')
        except:
            print('Salvo na pasta excel/' + name)
            
        date_time = data_updated['date'].iloc[-1].split('/')        
        new_path = data_updated['autor'].iloc[-1] + '_log_' + f'{date_time[-1]}_{date_time[1]}_{date_time[0]}' + '.xlsx'
        data_updated.to_excel('excel/'+name+'/'+new_path, index = False)  
    
    print("Concluído com sucesso !!!")
    return f'{date_time[-1]}_{date_time[1]}_{date_time[0]}'

In [None]:
def preenche_col_metric_log(data_original, mean_col = 'likes', L = 'L', week = True):
    if week:
        dias = 7
        col_name = 'mean_'+ L +'LW'
    else:
        dias = 30
        col_name = 'mean_'+ L +'LM'
        
    data = data_original.copy()
    data[col_name] = 0
    data = data.reset_index(drop = True)
    
    for i in range(dias-1,data.shape[0]):
        data.loc[i, col_name] = int(data.loc[i-dias+1:i, mean_col].mean())

    return data

In [None]:
def mean_log_week(data_original, col, col2):
    data = data_original.copy()
    data[col2].iloc[-1] = int(data[col][-7:].mean())
    return data

In [None]:
def mean_log_month(data_original, col, col2):
    data = data_original.copy()
    data[col2].iloc[-1] = int(data[col][-30:].mean())
    return data

In [None]:
def save_data_con_excel(data_con, date):
    data_con_t[['autor', 'likes', 'video_view_count', 'followers', \
            'date', 'publi_viral', 'reels_viral', 'link']].to_excel('excel/tabela_concorrentes'+ date.replace('/','_') + '.xlsx', index = False)

In [None]:
def transform_csv_to_excel(path):
    data_csv = pd.read_csv(path)
    data_csv.to_excel(path[:-4]+'.xlsx', index = False)

In [None]:
def transform_excel_to_csv(path):
    data_csv = pd.read_excel(path)        
    data_csv.to_csv(path[:-5]+'.csv', index = False)

In [None]:
def change_date(date):
    return date.replace('_', '/')

# Análise de perfil

In [None]:
# Efetuando login
L = reali_login_context('login/login.txt')